36#ifndef JAU_TOKEN_FSM_HPP_ 
   37#define JAU_TOKEN_FSM_HPP_ 
   48#ifdef JAU_DO_JAU_TRACE_PRINT 
   49    #define JAU_TRACE_PRINT(...) fprintf(stderr, __VA_ARGS__); 
   51    #define JAU_TRACE_PRINT(...) 
   95            : name_( std::move(_name) ), base_(_base), cpf(_cpf) {}
 
   98            constexpr const std::string& 
name() const noexcept { 
return name_; }
 
  107                std::string res(
"alphabet[");
 
  116        return lhs.base() != rhs.base() || lhs.name() != rhs.name();
 
  120        return !( lhs != rhs );
 
  135            static code_point_t s_code_point(
const char c) 
noexcept {
 
  136                if( 
' ' <= c && c <= 
'~' ) {
 
  145            : 
alphabet("ascii95", 95, s_code_point) {}
 
  160            static code_point_t s_code_point(
const char c) 
noexcept {
 
  161                if( 
' ' <= c && c < 
'a' ) { 
 
  163                } 
else if( 
'a' <= c && c <= 
'z' ) { 
 
  164                    return c - 
'a' + 
'A' - 
' ';
 
  165                } 
else if( 
'{' <= c && c <= 
'~' ) {
 
  166                    return c - 
'{' + 
'a' - 
' '; 
 
  174            : 
alphabet("ascii69", 69, s_code_point) {}
 
  189            static code_point_t s_code_point(
const char c) 
noexcept {
 
  190                if( 
'A' <= c && c < 
'Z' ) { 
 
  192                } 
else if( 
'a' <= c && c <= 
'z' ) { 
 
  201            : 
alphabet("ascii26", 26, s_code_point) {}
 
  212    template<
typename State_type,
 
  213             std::enable_if_t<std::is_integral_v<State_type> &&
 
  214                              std::is_unsigned_v<State_type> &&
 
  221            typedef State_type uint_t;
 
  231            struct token_value_t {
 
  236                std::string_view value;
 
  239                    return "[ts "+
std::to_string(name)+
", value "+std::string(value)+
"]";
 
  261            token_fsm ( 
const token_fsm& src ) 
noexcept = 
default;
 
  262            token_fsm ( token_fsm&& src ) 
noexcept = 
default;
 
  263            token_fsm& operator=(
const token_fsm& x) 
noexcept = 
default;
 
  264            token_fsm& operator=(token_fsm&& x) 
noexcept = 
default;
 
  266            uint_t state_count() const noexcept { 
return m_next_state-1; }
 
  267            uint_t next_state() const noexcept { 
return m_next_state; }
 
  269            bool empty() const noexcept { 
return 0 == state_count(); }
 
  272            bool contains(uint_t token_name) 
const noexcept {
 
  273                return m_token_names.cend() != 
std::find(m_token_names.cbegin(), m_token_names.cend(), token_name);
 
  277            size_t count() const noexcept { 
return m_token_names.size(); }
 
  280            bool is_separator(
const char c) 
const noexcept {
 
  281                return m_separators.cend() != 
std::find(m_separators.cbegin(), m_separators.cend(), c);
 
  287            void grow(
const uint_t required_sz) {
 
  288                while( m_matrix.size() < required_sz ) {
 
  289                    uint_t line[m_row_len];
 
  290                    ::bzero(line, 
sizeof(line));
 
  291                    m_matrix.push_back(&line[0], &line[m_row_len]);
 
  298            std::string m_separators;
 
  302            std::vector<uint_t> m_token_names;
 
  309            void clear() noexcept {
 
  312                m_token_names.clear();
 
  321            token_fsm (alphabet alphabet, 
const std::string_view separators = 
"\040\011\012\015")
 
  322            : m_alphabet( 
std::
move(alphabet) ),
 
  323              m_row_len(m_alphabet.base()), m_end(m_row_len-1),
 
  324              m_separators(separators),
 
  325              m_matrix(), m_next_state(1), m_token_names()
 
  345            token_fsm ( 
const alphabet& alphabet, 
const std::vector<token_value_t>& key_words, 
const std::string_view separators = 
"\040\011\012\015")
 
  346            : token_fsm(alphabet, separators)
 
  350                for( 
size_t word_num=0;
 
  351                     word_num < key_words.size() && m_next_state < max_state;
 
  355                    if( !add( key_words[word_num] ) ) {
 
  376            bool add(
const token_value_t& tkey_word) 
noexcept {
 
  377                if( 0 == tkey_word.name || token_error == tkey_word.name ) {
 
  385                const std::string_view& key_word = tkey_word.value;
 
  386                uint_t current_state = 0;
 
  388                uint_t c = token_error;
 
  389                JAU_TRACE_PRINT(
"token_fsm::add: %s:\n", tkey_word.to_string().c_str());
 
  392                uint_t  next_state = m_next_state;
 
  395                     char_num < key_word.size() &&
 
  396                     next_state < max_state;
 
  400                    c = key_word[char_num];
 
  403                    if( is_separator( c ) ) {
 
  415                    const uint_t current_idx = m_row_len*current_state+c;
 
  417                    JAU_TRACE_PRINT(
"c-off %zu, state %zu, idx %zu] ", (
size_t)c, (
size_t)current_state, (
size_t)current_idx);
 
  419                    const uint_t current_token = m_matrix[current_idx];
 
  420                    if( !current_token ) {
 
  421                        m_matrix[current_idx] = next_state;
 
  422                        current_state = next_state++;
 
  425                        current_state = current_token;
 
  430                if( char_num > 0 && c != token_error ) {
 
  432                    const uint_t current_idx = m_row_len*current_state+m_end;
 
  435                    m_matrix[current_idx] = tkey_word.name;
 
  436                    m_token_names.push_back( tkey_word.name );
 
  437                    JAU_TRACE_PRINT(
"   -> terminal [c-off %zu, state %zu, idx %zu] = %zu\n", (
size_t)m_end, (
size_t)current_state, (
size_t)current_idx, (
size_t)tkey_word.name);
 
  445                if( next_state >= max_state ) {
 
  450                    m_next_state = next_state;
 
  467            result_t 
find(
const std::string_view& haystack, 
int start=0) noexcept {
 
  468                if( 0 == m_matrix.size() ) {
 
  469                    return token_fsm::result_t { .token_name = token_error, .source_begin = 0, .source_last = 0 };
 
  475                uint_t current_state = 0;
 
  477                while( i < haystack.size() && !current_state ) {
 
  479                    if( is_separator(haystack[i2-1]) || i2==0 ) {
 
  481                            if( i2 == haystack.size() ) {
 
  484                            } 
else if( is_separator( c = haystack[i2++] ) ) {
 
  497                            const uint_t current_idx = m_row_len*current_state+c;
 
  498                            if( current_idx >= m_matrix.size() ) {
 
  502                            current_state = m_matrix[current_idx];
 
  503                        } 
while( current_state && c != m_end );
 
  507                if( c == m_end && current_state ) {
 
  508                    return token_fsm::result_t { .token_name = current_state, .source_begin = i - 1, .source_last = i2 };
 
  510                    return token_fsm::result_t { .token_name = token_error, .source_begin = 0, .source_last = 0 };
 
  522            uint_t 
get(
const std::string_view& word) 
noexcept {
 
  523                if( 0 == m_matrix.size() ) {
 
  529                uint_t current_state = 0;
 
  532                    if( i2 == word.size() ) {
 
  545                    const uint_t current_idx = m_row_len*current_state+c;
 
  546                    JAU_TRACE_PRINT(
"   [c-off %zu, state %zu, idx %zu] ", (
size_t)c, (
size_t)current_state, (
size_t)current_idx);
 
  547                    if( current_idx >= m_matrix.size() ) {
 
  552                    current_state = m_matrix[current_idx];
 
  554                } 
while( current_state && c != m_end );
 
  556                if( c == m_end && current_state ) {
 
  558                    return current_state;
 
  565            std::string fsm_to_string(
const int token_per_row) 
const noexcept {
 
  566                const uint_t sz = m_matrix.size();
 
  567                const uint_t rows = sz / m_row_len;
 
  569                std::string s = 
"token_fsm["+m_alphabet.to_string()+
", "+
std::to_string(count())+
" token, sz "+
 
  575                for(uint_t y=0; y<rows && idx<sz; ++y) {
 
  576                    snprintf(buf, 
sizeof(buf), 
"\n%3zu: ", (
size_t)y);
 
  578                    for(uint_t x=0; x<m_row_len && idx<sz; ++x, ++idx) {
 
  579                        const uint_t t = m_matrix[m_row_len*y+x];
 
  580                        snprintf(buf, 
sizeof(buf), 
"%3zu, ", (
size_t)t);
 
  582                        if( x < m_row_len-1 && ( x + 1 ) % token_per_row == 0 ) {
 
  592                const uint_t sz = m_matrix.size();
 
  593                const uint_t rows = sz / m_row_len;
 
  594                return "token_fsm["+m_alphabet.to_string()+
", "+
std::to_string(count())+
" token, sz "+
 
Implementation of a dynamic linear array storage, aka vector.
Base Alphabet Specification providing the alphabet for token_fsm.
Case insensitive ASCII base 26 alphabet with ASCII code-point sorting order.
Case insensitive ASCII base 69 alphabet with ASCII code-point sorting order.
Full ASCII base 95 alphabet with ASCII code-point sorting order.
constexpr bool contains(InputArray &array, const T &value)
Return true if value is contained in array.
constexpr InputIt find(InputIt first, InputIt last, const T &value)
Like std::find() of 'algorithm'.
void clear() noexcept
Clears internal list.
Entry * get(const EUI48 &addr, const std::string &name, AddressNameEntryMatchFunc m) noexcept
Returns a matching BTSecurityRegistry::Entry with the given addr and/or name.
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
constexpr T max(const T x, const T y) noexcept
Returns the maximum of two integrals (w/ branching) in O(1)
uint16_t code_point_t
Unsigned int symbol for alphabet code-point type.
ascii95_alphabet() noexcept
bool operator==(const alphabet &lhs, const alphabet &rhs) noexcept
constexpr const std::string & name() const noexcept
Human readable name for this alphabet instance.
bool operator!=(const alphabet &lhs, const alphabet &rhs) noexcept
code_point_t(* code_point_func)(const char c) noexcept
std::string to_string() const noexcept
constexpr code_point_t base() const noexcept
The fixed base used for this alphabet, i.e.
ascii26_alphabet() noexcept
constexpr code_point_t code_point(const char c) const noexcept
Returns the token of the given character or code_error if not element of this alphabet.
static constexpr const code_point_t code_error
token_error value, denoting an invalid alphabet code-point.
ascii69_alphabet() noexcept
alphabet(std::string _name, code_point_t _base, code_point_func _cpf) noexcept
std::string to_string(const alphabet &v) noexcept
#define JAU_TRACE_PRINT(...)