123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646 |
- ///////////////////////////////////////////////////////////////////////////////
- //
- // The MIT License (MIT)
- //
- // Copyright (c) Crossbar.io Technologies GmbH
- //
- // Permission is hereby granted, free of charge, to any person obtaining a copy
- // of this software and associated documentation files (the "Software"), to deal
- // in the Software without restriction, including without limitation the rights
- // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- // copies of the Software, and to permit persons to whom the Software is
- // furnished to do so, subject to the following conditions:
- //
- // The above copyright notice and this permission notice shall be included in
- // all copies or substantial portions of the Software.
- //
- // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- // THE SOFTWARE.
- //
- ///////////////////////////////////////////////////////////////////////////////
-
- #include <stdlib.h>
- #include <stdint.h>
-
- // http://stackoverflow.com/questions/11228855/header-files-for-simd-intrinsics
- #include <x86intrin.h>
-
-
- #define UTF8_ACCEPT 0
- #define UTF8_REJECT 1
-
-
- typedef struct {
- size_t current_index;
- size_t total_index;
- int state;
- int impl;
- } utf8_validator_t;
-
-
- #define UTF8_VALIDATOR_OPTIMAL 0
- #define UTF8_VALIDATOR_TABLE_DFA 1
- #define UTF8_VALIDATOR_UNROLLED_DFA 2
- #define UTF8_VALIDATOR_SSE2_DFA 3
- #define UTF8_VALIDATOR_SSE41_DFA 4
-
-
- int nvx_utf8vld_get_impl (void* utf8vld) {
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- return vld->impl;
- }
-
- int nvx_utf8vld_set_impl (void* utf8vld, int impl) {
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- if (impl) {
- // set requested implementation
- //
- #ifndef __SSE4_1__
- # ifdef __SSE2__
- if (impl <= UTF8_VALIDATOR_SSE2_DFA) {
- vld->impl = impl;
- }
- # else
- if (impl <= UTF8_VALIDATOR_UNROLLED_DFA) {
- vld->impl = impl;
- }
- # endif
- #else
- if (impl <= UTF8_VALIDATOR_SSE41_DFA) {
- vld->impl = impl;
- }
- #endif
-
- } else {
- // set optimal implementation
- //
- #ifndef __SSE4_1__
- # ifdef __SSE2__
- vld->impl = UTF8_VALIDATOR_SSE2_DFA;
- # else
- vld->impl = UTF8_VALIDATOR_UNROLLED_DFA;
- # endif
- #else
- vld->impl = UTF8_VALIDATOR_SSE41_DFA;
- #endif
-
- }
- return vld->impl;
- }
-
-
- void nvx_utf8vld_reset (void* utf8vld) {
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- vld->state = 0;
- vld->current_index = -1;
- vld->total_index = -1;
- }
-
-
- void* nvx_utf8vld_new () {
- void* p = malloc(sizeof(utf8_validator_t));
- nvx_utf8vld_reset(p);
- nvx_utf8vld_set_impl(p, 0);
- return p;
- }
-
-
- void nvx_utf8vld_free (void* utf8vld) {
- free (utf8vld);
- }
-
-
- // unrolled DFA from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- //
- static const uint8_t UTF8VALIDATOR_DFA[] __attribute__((aligned(64))) =
- {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
- 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
- 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
-
- 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
- 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
- 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
- 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
- 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
- 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // s7..s8
- };
-
-
- int _nvx_utf8vld_validate_table (void* utf8vld, const uint8_t* data, size_t length) {
-
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- int state = vld->state;
-
- const uint8_t* end = data + length;
-
- while (data < end && state != 1) {
- state = UTF8VALIDATOR_DFA[256 + state * 16 + UTF8VALIDATOR_DFA[*data++]];
- }
-
- vld->state = state;
-
- if (state == 0) {
- // UTF8 is valid and ends on codepoint
- return 0;
- } else {
- if (state == 1) {
- // UTF8 is invalid
- return -1;
- } else {
- // UTF8 is valid, but does not end on codepoint (needs more data)
- return 1;
- }
- }
- }
-
-
- // unrolled DFA from http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
- //
- #define DFA_TRANSITION(state, octet) \
- if (state == 0) { \
- if (octet >= 0x00 && octet <= 0x7f) { \
- /* reflective state 0 */ \
- } else if (octet >= 0xc2 && octet <= 0xdf) { \
- state = 2; \
- } else if ((octet >= 0xe1 && octet <= 0xec) || octet == 0xee || octet == 0xef) { \
- state = 3; \
- } else if (octet == 0xe0) { \
- state = 4; \
- } else if (octet == 0xed) { \
- state = 5; \
- } else if (octet == 0xf4) { \
- state = 8; \
- } else if (octet == 0xf1 || octet == 0xf2 || octet == 0xf3) { \
- state = 7; \
- } else if (octet == 0xf0) { \
- state = 6; \
- } else { \
- state = 1; \
- } \
- } else if (state == 2) { \
- if (octet >= 0x80 && octet <= 0xbf) { \
- state = 0; \
- } else { \
- state = 1; \
- } \
- } else if (state == 3) { \
- if (octet >= 0x80 && octet <= 0xbf) { \
- state = 2; \
- } else { \
- state = 1; \
- } \
- } else if (state == 4) { \
- if (octet >= 0xa0 && octet <= 0xbf) { \
- state = 2; \
- } else { \
- state = 1; \
- } \
- } else if (state == 5) { \
- if (octet >= 0x80 && octet <= 0x9f) { \
- state = 2; \
- } else { \
- state = 1; \
- } \
- } else if (state == 6) { \
- if (octet >= 0x90 && octet <= 0xbf) { \
- state = 3; \
- } else { \
- state = 1; \
- } \
- } else if (state == 7) { \
- if (octet >= 0x80 && octet <= 0xbf) { \
- state = 3; \
- } else { \
- state = 1; \
- } \
- } else if (state == 8) { \
- if (octet >= 0x80 && octet <= 0x8f) { \
- state = 3; \
- } else { \
- state = 1; \
- } \
- } else if (state == 1) { \
- /* refective state 1 */ \
- } else { \
- /* should not arrive here */ \
- }
-
-
- int _nvx_utf8vld_validate_unrolled (void* utf8vld, const uint8_t* data, size_t length) {
-
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- int state = vld->state;
-
- const uint8_t* tail_end = data + length;
-
- while (data < tail_end && state != 1) {
-
- // get tail octet
- int octet = *data;
-
- // do the DFA
- DFA_TRANSITION(state, octet);
-
- ++data;
- }
-
- vld->state = state;
-
- if (state == 0) {
- // UTF8 is valid and ends on codepoint
- return 0;
- } else {
- if (state == 1) {
- // UTF8 is invalid
- return -1;
- } else {
- // UTF8 is valid, but does not end on codepoint (needs more data)
- return 1;
- }
- }
- }
-
-
- /*
- __m128i _mm_load_si128 (__m128i const* mem_addr)
- #include "emmintrin.h"
- Instruction: movdqa
- CPUID Feature Flag: SSE2
-
- int _mm_movemask_epi8 (__m128i a)
- #include "emmintrin.h"
- Instruction: pmovmskb
- CPUID Feature Flag: SSE2
-
- __m128i _mm_srli_si128 (__m128i a, int imm)
- #include "emmintrin.h"
- Instruction: psrldq
- CPUID Feature Flag: SSE2
-
- int _mm_cvtsi128_si32 (__m128i a)
- #include "emmintrin.h"
- Instruction: movd
- CPUID Feature Flag: SSE2
-
- int _mm_extract_epi16 (__m128i a, int imm)
- #include "emmintrin.h"
- Instruction: pextrw
- CPUID Feature Flag: SSE2
-
- int _mm_extract_epi8 (__m128i a, const int imm)
- #include "smmintrin.h"
- Instruction: pextrb
- CPUID Feature Flag: SSE4.1
- */
-
- #ifdef __SSE2__
- int _nvx_utf8vld_validate_sse2 (void* utf8vld, const uint8_t* data, size_t length) {
-
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- int state = vld->state;
-
- const uint8_t* tail_end = data + length;
-
- // process unaligned head (sub 16 octets)
- //
- size_t head_len = ((size_t) data) % sizeof(__m128i);
- if (head_len) {
-
- const uint8_t* head_end = data + head_len;
-
- while (data < head_end && state != UTF8_REJECT) {
-
- // get head octet
- int octet = *data;
-
- // do the DFA
- DFA_TRANSITION(state, octet);
-
- ++data;
- }
- }
-
- // process aligned middle (16 octet chunks)
- //
- const __m128i* ptr = ((const __m128i*) data);
- const __m128i* end = ((const __m128i*) data) + ((length - head_len) / sizeof(__m128i));
-
- while (ptr < end && state != UTF8_REJECT) {
-
- __builtin_prefetch(ptr + 1, 0, 3);
- //__builtin_prefetch(ptr + 4, 0, 3); // 16*4=64: cache-line prefetch
-
- __m128i xmm1 = _mm_load_si128(ptr);
-
- if (__builtin_expect(state || _mm_movemask_epi8(xmm1), 0)) {
-
- // copy to different reg - this allows the prefetching to
- // do its job in the meantime (I guess ..)
-
- // SSE2 variant
- //
- int octet;
-
- // octet 0
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 1
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 2
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 3
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 4
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 5
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 6
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 7
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 8
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 9
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 10
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 11
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 12
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 13
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 14
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
-
- // octet 15
- xmm1 = _mm_srli_si128(xmm1, 1);
- octet = 0xff & _mm_cvtsi128_si32(xmm1);
- DFA_TRANSITION(state, octet);
- }
- ++ptr;
- }
-
- // process unaligned tail (sub 16 octets)
- //
- const uint8_t* tail_ptr = (const uint8_t*) ptr;
-
- while (tail_ptr < tail_end && state != UTF8_REJECT) {
-
- // get tail octet
- int octet = *tail_ptr;
-
- // do the DFA
- DFA_TRANSITION(state, octet);
-
- ++tail_ptr;
- }
-
- vld->state = state;
-
- if (state == UTF8_ACCEPT) {
- // UTF8 is valid and ends on codepoint
- return 0;
- } else {
- if (state == UTF8_REJECT) {
- // UTF8 is invalid
- return -1;
- } else {
- // UTF8 is valid, but does not end on codepoint (needs more data)
- return 1;
- }
- }
- }
- #endif
-
-
- #ifdef __SSE4_1__
- int _nvx_utf8vld_validate_sse4 (void* utf8vld, const uint8_t* data, size_t length) {
-
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- int state = vld->state;
-
- const uint8_t* tail_end = data + length;
-
- // process unaligned head (sub 16 octets)
- //
- size_t head_len = ((size_t) data) % sizeof(__m128i);
- if (head_len) {
-
- const uint8_t* head_end = data + head_len;
-
- while (data < head_end && state != UTF8_REJECT) {
-
- // get head octet
- int octet = *data;
-
- // do the DFA
- DFA_TRANSITION(state, octet);
-
- ++data;
- }
- }
-
- // process aligned middle (16 octet chunks)
- //
- const __m128i* ptr = ((const __m128i*) data);
- const __m128i* end = ((const __m128i*) data) + ((length - head_len) / sizeof(__m128i));
-
- while (ptr < end && state != UTF8_REJECT) {
-
- __builtin_prefetch(ptr + 1, 0, 3);
- //__builtin_prefetch(ptr + 4, 0, 3); // 16*4=64: cache-line prefetch
-
- __m128i xmm1 = _mm_load_si128(ptr);
-
-
- if (__builtin_expect(state || _mm_movemask_epi8(xmm1), 0)) {
-
- // copy to different reg - this allows the prefetching to
- // do its job in the meantime (I guess ..)
-
- // SSE4.1 variant
- //
- int octet;
-
- // octet 0
- octet = _mm_extract_epi8(xmm1, 0);
- DFA_TRANSITION(state, octet);
-
- // octet 1
- octet = _mm_extract_epi8(xmm1, 1);
- DFA_TRANSITION(state, octet);
-
- // octet 2
- octet = _mm_extract_epi8(xmm1, 2);
- DFA_TRANSITION(state, octet);
-
- // octet 3
- octet = _mm_extract_epi8(xmm1, 3);
- DFA_TRANSITION(state, octet);
-
- // octet 4
- octet = _mm_extract_epi8(xmm1, 4);
- DFA_TRANSITION(state, octet);
-
- // octet 5
- octet = _mm_extract_epi8(xmm1, 5);
- DFA_TRANSITION(state, octet);
-
- // octet 6
- octet = _mm_extract_epi8(xmm1, 6);
- DFA_TRANSITION(state, octet);
-
- // octet 7
- octet = _mm_extract_epi8(xmm1, 7);
- DFA_TRANSITION(state, octet);
-
- // octet 8
- octet = _mm_extract_epi8(xmm1, 8);
- DFA_TRANSITION(state, octet);
-
- // octet 9
- octet = _mm_extract_epi8(xmm1, 9);
- DFA_TRANSITION(state, octet);
-
- // octet 10
- octet = _mm_extract_epi8(xmm1, 10);
- DFA_TRANSITION(state, octet);
-
- // octet 11
- octet = _mm_extract_epi8(xmm1, 11);
- DFA_TRANSITION(state, octet);
-
- // octet 12
- octet = _mm_extract_epi8(xmm1, 12);
- DFA_TRANSITION(state, octet);
-
- // octet 13
- octet = _mm_extract_epi8(xmm1, 13);
- DFA_TRANSITION(state, octet);
-
- // octet 14
- octet = _mm_extract_epi8(xmm1, 14);
- DFA_TRANSITION(state, octet);
-
- // octet 15
- octet = _mm_extract_epi8(xmm1, 15);
- DFA_TRANSITION(state, octet);
- }
- ++ptr;
- }
-
- // process unaligned tail (sub 16 octets)
- //
- const uint8_t* tail_ptr = (const uint8_t*) ptr;
-
- while (tail_ptr < tail_end && state != UTF8_REJECT) {
-
- // get tail octet
- int octet = *tail_ptr;
-
- // do the DFA
- DFA_TRANSITION(state, octet);
-
- ++tail_ptr;
- }
-
- vld->state = state;
-
- if (state == UTF8_ACCEPT) {
- // UTF8 is valid and ends on codepoint
- return 0;
- } else {
- if (state == UTF8_REJECT) {
- // UTF8 is invalid
- return -1;
- } else {
- // UTF8 is valid, but does not end on codepoint (needs more data)
- return 1;
- }
- }
- }
- #endif
-
-
- int nvx_utf8vld_validate (void* utf8vld, const uint8_t* data, size_t length) {
-
- utf8_validator_t* vld = (utf8_validator_t*) utf8vld;
-
- switch (vld->impl) {
- case UTF8_VALIDATOR_TABLE_DFA:
- return _nvx_utf8vld_validate_table(utf8vld, data, length);
- case UTF8_VALIDATOR_UNROLLED_DFA:
- return _nvx_utf8vld_validate_unrolled(utf8vld, data, length);
- #ifdef __SSE2__
- case UTF8_VALIDATOR_SSE2_DFA:
- return _nvx_utf8vld_validate_table(utf8vld, data, length);
- #endif
- #ifdef __SSE4_1__
- case UTF8_VALIDATOR_SSE41_DFA:
- return _nvx_utf8vld_validate_table(utf8vld, data, length);
- #endif
- default:
- return _nvx_utf8vld_validate_table(utf8vld, data, length);
- }
- }
|