jaulib v1.3.0
Jau Support Library (C++, Java, ..)
dfa_utf8_decode.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> (see details below)
4 * Copyright (c) 2020 Gothel Software e.K.
5 * Copyright (c) 2020 ZAFENA AB
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#ifndef JAU_DFA_UTF8_DECODE_HPP_
28#define JAU_DFA_UTF8_DECODE_HPP_
29
30#define DFA_UTF8_ACCEPT 0
31#define DFA_UTF8_REJECT 12
32
33#include <string>
34#include <cstdint>
35#include <cinttypes>
36
37namespace jau {
38 /**
39 * \ingroup ByteUtils
40 *
41 * @param state
42 * @param codep
43 * @param byte_value
44 * @return
45 */
46 uint32_t dfa_utf8_decode(uint32_t & state, uint32_t & codep, const uint32_t byte_value);
47
48 /**
49 * \ingroup ByteUtils
50 *
51 * Returns all valid consecutive UTF-8 characters within buffer
52 * in the range up to buffer_size or until EOS.
53 * <p>
54 * In case a non UTF-8 character has been detected,
55 * the content will be cut off and the decoding loop ends.
56 * </p>
57 * <p>
58 * Method utilizes a finite state machine detecting variable length UTF-8 codes.
59 * See Bjoern Hoehrmann's site <http://bjoern.hoehrmann.de/utf-8/decoder/dfa/> for details.
60 * </p>
61 */
62 std::string dfa_utf8_decode(const uint8_t *buffer, const size_t buffer_size);
63} /* namespace jau */
64
65#endif /* JAU_DFA_UTF8_DECODE_HPP_ */
uint32_t dfa_utf8_decode(uint32_t &state, uint32_t &codep, const uint32_t byte_value)
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition: backtrace.hpp:32