jaulib v1.3.6
Jau Support Library (C++, Java, ..)
Loading...
Searching...
No Matches
token_fsm.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 1992-2022 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Original header of salvaged code from July 1992
25 *
26 * TOKEN_A.cpp --- Einfacher TOKEN-AUTOMAT fuer STRINGS 5. Juli 1993
27 * V1.0
28 *
29 * Sven Goethel - Stapenhorststr. 35a 4800 Bielefeld 1
30 *
31 *
32 * *************************************************************************
33 * * TOKEN AUTOMAT .... written 29.07.1992 by Sven Göthel *
34 * *************************************************************************
35 */
36#ifndef JAU_TOKEN_FSM_HPP_
37#define JAU_TOKEN_FSM_HPP_
38
39#include <string>
40#include <type_traits>
41#include <vector>
42
43#include <jau/basic_algos.hpp>
44#include <jau/darray.hpp>
45#include <jau/int_types.hpp>
46#include <jau/secmem.hpp>
47
48// #define JAU_DO_JAU_TRACE_PRINT 1
49#ifdef JAU_DO_JAU_TRACE_PRINT
50 #define JAU_TRACE_PRINT(...) fprintf(stderr, __VA_ARGS__);
51#else
52 #define JAU_TRACE_PRINT(...)
53#endif
54
55namespace jau::lang {
56 /** @defgroup Lang Languages
57 * Language functionality, programming and otherwise
58 *
59 * Supported
60 * - jau::lang::token_fsm A lexical analyzer (tokenizer) using a tabular finite-state-machine (FSM), aka `endlicher automat` (EA)
61 *
62 * For serious applications w/ regular expressions and more, as well as a `lex` C++ alternative to `flex`,
63 * consider using [Re-flex](https://github.com/Genivia/RE-flex).
64 *
65 * @{
66 */
67
68 /**
69 * Base Alphabet Specification providing the alphabet for token_fsm.
70 *
71 * Implementation delegates static code_point() function.
72 *
73 * @see token_fsm()
74 */
75 class alphabet {
76 public:
77 /**
78 * Unsigned int symbol for alphabet code-point type
79 */
80 typedef uint16_t code_point_t;
81
82 /**
83 * token_error value, denoting an invalid alphabet code-point.
84 */
85 static inline constexpr const code_point_t code_error = std::numeric_limits<code_point_t>::max();
86
87 typedef code_point_t (*code_point_func)(const char c) noexcept;
88
89 private:
90 std::string name_;
91 code_point_t base_;
93
94 public:
95 alphabet(std::string _name, code_point_t _base, code_point_func _cpf) noexcept
96 : name_( std::move(_name) ), base_(_base), cpf(_cpf) {}
97
98 /** Human readable name for this alphabet instance. */
99 constexpr const std::string& name() const noexcept { return name_; }
100
101 /** The fixed base used for this alphabet, i.e. number of token. */
102 constexpr code_point_t base() const noexcept { return base_; }
103
104 /** Returns the token of the given character or code_error if not element of this alphabet. */
105 constexpr code_point_t code_point(const char c) const noexcept { return cpf(c); }
106
107 std::string to_string() const noexcept {
108 std::string res("alphabet[");
109 res.append(name());
110 res.append(", base "+std::to_string(base())+"]");
111 return res;
112 }
113 };
114 inline std::string to_string(const alphabet& v) noexcept { return v.to_string(); }
115
116 inline bool operator!=(const alphabet& lhs, const alphabet& rhs ) noexcept {
117 return lhs.base() != rhs.base() || lhs.name() != rhs.name();
118 }
119
120 inline bool operator==(const alphabet& lhs, const alphabet& rhs ) noexcept {
121 return !( lhs != rhs );
122 }
123
124 /**
125 * Full ASCII base 95 alphabet with ASCII code-point sorting order.
126 *
127 * ### Properties
128 * - Base 95, i.e. full visible ASCII [32 .. 126]
129 * - 7-bit ASCII
130 * - Code page 437 compatible
131 * - Supporting ASCII code-point sorting.
132 * - Order: ` ` < `0` < `:` < `A` < `[` < `a` < `{` < `~`
133 */
134 class ascii95_alphabet : public alphabet {
135 private:
136 static code_point_t s_code_point(const char c) noexcept {
137 if( ' ' <= c && c <= '~' ) {
138 return c - ' ';
139 } else {
140 return code_error;
141 }
142 }
143
144 public:
146 : alphabet("ascii95", 95, s_code_point) {}
147 };
148
149 /**
150 * Case insensitive ASCII base 69 alphabet with ASCII code-point sorting order.
151 *
152 * ### Properties
153 * - Base 69, i.e. ASCII [32 .. 96] + [123 .. 126], merging lower- and capital-letters
154 * - 7-bit ASCII
155 * - Code page 437 compatible
156 * - Supporting ASCII code-point sorting.
157 * - Order: ` ` < `0` < `:` < `A` < `[` < `{` < `~`
158 */
159 class ascii69_alphabet : public alphabet {
160 private:
161 static code_point_t s_code_point(const char c) noexcept {
162 if( ' ' <= c && c < 'a' ) { // [ 0 .. 64 ]
163 return c - ' ';
164 } else if( 'a' <= c && c <= 'z' ) { // [ 33 .. 58 ]
165 return c - 'a' + 'A' - ' ';
166 } else if( '{' <= c && c <= '~' ) {
167 return c - '{' + 'a' - ' '; // [ 65 .. 68 ]
168 } else {
169 return code_error;
170 }
171 }
172
173 public:
175 : alphabet("ascii69", 69, s_code_point) {}
176 };
177
178 /**
179 * Case insensitive ASCII base 26 alphabet with ASCII code-point sorting order.
180 *
181 * ### Properties
182 * - Base 26, i.e. ASCII [65 .. 90], merging lower- and capital-letters
183 * - 7-bit ASCII
184 * - Code page 437 compatible
185 * - Supporting ASCII code-point sorting.
186 * - Order: `A` < `Z`
187 */
188 class ascii26_alphabet : public alphabet {
189 private:
190 static code_point_t s_code_point(const char c) noexcept {
191 if( 'A' <= c && c < 'Z' ) { // [ 0 .. 25 ]
192 return c - 'A';
193 } else if( 'a' <= c && c <= 'z' ) { // [ 0 .. 25 ]
194 return c - 'a';
195 } else {
196 return code_error;
197 }
198 }
199
200 public:
202 : alphabet("ascii26", 26, s_code_point) {}
203 };
204
205 /**
206 * A lexical analyzer (tokenizer) using a tabular finite-state-machine (FSM), aka `endlicher automat` (EA).
207 *
208 * Implemented initially by Sven Gothel in July 1992 using early C++ with and brought to a clean C++17 template.
209 *
210 * @tparam State_type used for token name and internal FSM, hence memory sensitive.
211 * Must be an unsigned integral type with minimum size of sizeof(alphabet::code_point_t), i.e. uint16_t.
212 */
213 template<typename State_type,
214 std::enable_if_t<std::is_integral_v<State_type> &&
215 std::is_unsigned_v<State_type> &&
216 sizeof(alphabet::code_point_t) <= sizeof(State_type), bool> = true>
217 class token_fsm {
218 public:
219 /**
220 * Unsigned int symbol for token-value type
221 */
222 typedef State_type uint_t;
223
224 /**
225 * token_error value, denoting an invalid token or alphabet code-point.
226 */
227 constexpr static const uint_t token_error = std::numeric_limits<uint_t>::max();
228
229 constexpr static uint_t to_symbol(char c) noexcept { return static_cast<unsigned char>(c); }
230
231 /**
232 * Terminal token name and ASCII string value pair, provided by user.
233 */
235 /** Token numerical name, a terminal symbol. Value must be greater than zero and not equal to token_error. */
237
238 /** Token ASCII string value to be tokenized. */
239 std::string_view value;
240
241 std::string to_string() const noexcept {
242 return "[ts "+std::to_string(name)+", value "+std::string(value)+"]";
243 }
244 };
245
246 /**
247 * Result type for token_fsm::find()
248 */
249 struct result_t {
250 /** Token numerical name (terminal symbol) if found, otherwise token_error */
252
253 /** Position of first char of token in source */
255
256 /** Last position in source after token. */
258
259 std::string to_string() const noexcept {
260 return "[ts "+std::to_string(token_name)+", pos["+std::to_string(source_begin)+".."+std::to_string(source_last)+")]";
261 }
262 };
263
264 token_fsm ( const token_fsm& src ) noexcept = default;
265 token_fsm ( token_fsm&& src ) noexcept = default;
266 token_fsm& operator=(const token_fsm& x) noexcept = default;
267 token_fsm& operator=(token_fsm&& x) noexcept = default;
268
269 uint_t state_count() const noexcept { return m_next_state-1; }
270 uint_t next_state() const noexcept { return m_next_state; }
271
272 bool empty() const noexcept { return 0 == state_count(); }
273
274 /** Returns true if this FSM containes the given token name */
275 bool contains(uint_t token_name) const noexcept {
276 return m_token_names.cend() != std::find(m_token_names.cbegin(), m_token_names.cend(), token_name);
277 }
278
279 /** Returns the number of contained token. */
280 size_t count() const noexcept { return m_token_names.size(); }
281
282 /** Returns true if the given char is listed as a separator. */
283 bool is_separator(const char c) const noexcept {
284 return m_separators.cend() != std::find(m_separators.cbegin(), m_separators.cend(), c);
285 }
286
287 private:
288 typedef jau::darray<uint_t, jau::nsize_t> darray_t;
289
290 void grow(const uint_t required_sz) {
291 m_matrix.reserve( required_sz + 100 );
292 while( m_matrix.size() < required_sz ) {
293 m_matrix.resize(m_matrix.size() + m_row_len, 0);
294 }
295 }
296
297 alphabet m_alphabet;
298 uint_t m_row_len;
299 uint_t m_end;
300 std::string m_separators;
301
302 darray_t m_matrix;
303 uint_t m_next_state;
304 darray_t m_token_names;
305
306 public:
307
308 /**
309 * Clears the FSM. Afterwards, the FSM can be filled over again from scratch.
310 */
311 void clear() noexcept {
312 m_matrix.clear(true);
313 m_next_state = 1;
314 m_token_names.clear(true);
315 }
316
317 /**
318 * Constructs an empty instance.
319 * @param alphabet the used alphabet
320 * @param separators separator, defaults to SPACE, TAB, LF, CR
321 * @see add()
322 */
323 token_fsm (alphabet alphabet, const std::string_view separators = "\040\011\012\015")
324 : m_alphabet( std::move(alphabet) ),
325 m_row_len(m_alphabet.base()), m_end(m_row_len-1),
326 m_separators(separators),
327 m_matrix(), m_next_state(1), m_token_names()
328 { }
329
330 /**
331 * Constructs a new instance w/ given token_value_t name and value pairs.
332 *
333 * In case of an error, method will clear() and abort, user might validated via empty().
334 *
335 * Reasons for failures could be
336 * - invalid token name, e.g. 0
337 * - duplicate token name in input key_words
338 * - invalid token value
339 * - empty string
340 * - invalid character according to given alphabet or a separator
341 *
342 * @param alphabet the used alphabet
343 * @param key_words vector of to be added token_value_t name and values
344 * @param separators separator, defaults to SPACE, TAB, LF, CR
345 * @see add()
346 */
347 token_fsm ( const alphabet& alphabet, const std::vector<token_value_t>& key_words, const std::string_view separators = "\040\011\012\015")
348 : token_fsm(alphabet, separators)
349 {
350 const uint_t max_state = (uint_t) std::numeric_limits<uint_t>::max();
351
352 for( size_t word_num=0;
353 word_num < key_words.size() && m_next_state < max_state;
354 word_num++
355 )
356 {
357 if( !add( key_words[word_num] ) ) {
358 return;
359 }
360 }
361 }
362
363 /**
364 * Adds given token_value_t name and value pair.
365 *
366 * In case of an error, method will clear() and abort, user might validated via empty().
367 *
368 * Reasons for failures could be
369 * - invalid token name, e.g. 0 or token_error
370 * - duplicate token name in input key_words
371 * - invalid token value
372 * - empty string
373 * - invalid character according to given alphabet or a separator
374 *
375 * @param tkey_word the given token name and value pair
376 * @return true if successful, otherwise false
377 */
378 bool add(const token_value_t& tkey_word) {
379 if( 0 == tkey_word.name || token_error == tkey_word.name ) {
380 // invalid token name
381 return false;
382 }
383 if( contains( tkey_word.name ) ) {
384 // already contained -> ERROR
385 return false;
386 }
387 const std::string_view& key_word = tkey_word.value;
388 uint_t current_state = 0;
389 size_t char_num = 0;
391 JAU_TRACE_PRINT("token_fsm::add: %s:\n", tkey_word.to_string().c_str());
392
393 const uint_t max_state = (uint_t) std::numeric_limits<uint_t>::max();
394 uint_t next_state = m_next_state;
395
396 for( ;
397 char_num < key_word.size() &&
398 next_state < max_state;
399 ++char_num
400 )
401 {
402 c = to_symbol(key_word[char_num]);
403 JAU_TRACE_PRINT(" [%c, ", (char)c);
404
405 if( is_separator( c ) ) {
406 c = token_error;
407 break; // invalid character
408 }
409
410 const alphabet::code_point_t cp = m_alphabet.code_point(c);
411 if( alphabet::code_error == cp ) {
412 c = token_error;
413 break; // invalid character
414 } else {
415 c = cp;
416 }
417 const uint_t current_idx = m_row_len*current_state+c;
418 grow(current_idx+1);
419 JAU_TRACE_PRINT("c-off %zu, state %zu, idx %zu] ", (size_t)c, (size_t)current_state, (size_t)current_idx);
420
421 const uint_t current_token = m_matrix[current_idx];
422 if( !current_token ) {
423 m_matrix[current_idx] = next_state;
424 current_state = next_state++;
425 JAU_TRACE_PRINT("-> state %zu (new),\n", (size_t)current_state);
426 } else {
427 current_state = current_token;
428 JAU_TRACE_PRINT("-> state %zu (jmp),\n", (size_t)current_state);
429 }
430 }
431
432 if( char_num > 0 && c != token_error ) {
433 // token value exists (char_num) and is valid (c)
434 const uint_t current_idx = m_row_len*current_state+m_end;
435 grow(current_idx+1);
436
437 m_matrix[current_idx] = tkey_word.name;
438 m_token_names.push_back( tkey_word.name );
439 JAU_TRACE_PRINT(" -> terminal [c-off %zu, state %zu, idx %zu] = %zu\n", (size_t)m_end, (size_t)current_state, (size_t)current_idx, (size_t)tkey_word.name);
440 } else {
441 // abort on invalid char (c) or non-existing word.(char_nu,)
442 JAU_TRACE_PRINT(" -> error\n");
443 clear();
444 return false;
445 }
446
447 if( next_state >= max_state ) {
448 // FSM exceeded, abort
449 clear();
450 return false;
451 } else {
452 m_next_state = next_state;
453 return true;
454 }
455 }
456
457 /**
458 * Find a token within the given haystack, starting from given start position.
459 *
460 * This method reads over all characters until a token has been found or end-of-view.
461 *
462 * This method considers given separators.
463 *
464 * @param haystack string view to search for tokens
465 * @param start start position, allowing to reuse the view
466 * @return result_t denoting the found token, where result_t::token_name == token_error denotes not found.
467 * @see get()
468 */
469 result_t find(const std::string_view& haystack, int start=0) noexcept {
470 if( 0 == m_matrix.size() ) {
471 return token_fsm::result_t { .token_name = token_error, .source_begin = 0, .source_last = 0 };
472 }
473
474 /* Bis Zeilenende oder Gefundener Token durchsuchen */
475 uint_t c = 0;
476 jau::nsize_t i = start;
477 uint_t current_state = 0;
478 jau::nsize_t i2 = 0;
479 while( i < haystack.size() && !current_state ) {
480 i2=i++;
481 if( is_separator(haystack[i2-1]) || i2==0 ) {
482 do {
483 if( i2 == haystack.size() ) {
484 // position after token end
485 c = m_end;
486 } else if( is_separator( c = to_symbol( haystack[i2++] ) ) ) {
487 i2--; // position after token end
488 c = m_end;
489 } else {
490 const alphabet::code_point_t cp = m_alphabet.code_point(c);
491 if( alphabet::code_error == cp ) {
492 c = token_error;
493 current_state=0;
494 break; // invalid character
495 } else {
496 c = cp;
497 }
498 }
499 const uint_t current_idx = m_row_len*current_state+c;
500 if( current_idx >= m_matrix.size() ) {
501 /** end-of-matrix **/
502 break;
503 }
504 current_state = m_matrix[current_idx];
505 } while( current_state && c != m_end );
506 }
507 }
508
509 if( c == m_end && current_state ) {
510 return token_fsm::result_t { .token_name = current_state, .source_begin = i - 1, .source_last = i2 };
511 } else {
512 return token_fsm::result_t { .token_name = token_error, .source_begin = 0, .source_last = 0 };
513 }
514 }
515
516 /**
517 * Returns the token numerical name (terminal symbol) if found, otherwise token_error.
518 *
519 * This method does not consider given separators and expects given word to match a token 1:1.
520 *
521 * @param word the key word to lookup
522 * @see find()
523 */
524 uint_t get(const std::string_view& word) noexcept {
525 if( 0 == m_matrix.size() ) {
526 return 0;
527 }
528 JAU_TRACE_PRINT("token_fsm::get: %s:\n", std::string(word).c_str());
529
530 uint_t c = 0;
531 uint_t current_state = 0;
532 jau::nsize_t i2 = 0;
533 do {
534 if( i2 == word.size() ) {
535 c = m_end;
536 } else {
537 c = to_symbol( word[i2++] );
538 const alphabet::code_point_t cp = m_alphabet.code_point(c);
539 if( alphabet::code_error == cp ) {
540 c = token_error;
541 current_state=0;
542 break; // invalid character
543 } else {
544 c = cp;
545 }
546 }
547 const uint_t current_idx = m_row_len*current_state+c;
548 JAU_TRACE_PRINT(" [c-off %zu, state %zu, idx %zu] ", (size_t)c, (size_t)current_state, (size_t)current_idx);
549 if( current_idx >= m_matrix.size() ) {
550 /** end-of-matrix **/
551 JAU_TRACE_PRINT("-> state %zu (eom),\n", (size_t)current_state);
552 break;
553 }
554 current_state = m_matrix[current_idx];
555 JAU_TRACE_PRINT("-> state %zu (ok),\n", (size_t)current_state);
556 } while( current_state && c != m_end );
557
558 if( c == m_end && current_state ) {
559 JAU_TRACE_PRINT(" -> final token %zu\n", (size_t)current_state);
560 return current_state;
561 } else {
562 JAU_TRACE_PRINT(" -> not found\n");
563 return token_error;
564 }
565 }
566
567 std::string fsm_to_string(const int token_per_row) const noexcept {
568 const uint_t sz = m_matrix.size();
569 const uint_t rows = sz / m_row_len;
570
571 std::string s = "token_fsm["+m_alphabet.to_string()+", "+std::to_string(count())+" token, sz "+
572 std::to_string(sz)+" cells / "+std::to_string(sz*sizeof(uint_t))+
573 " bytes, "+std::to_string(m_row_len)+"x"+std::to_string(rows)+
574 ", next_state "+std::to_string(m_next_state)+":";
575 char buf[80];
576 uint_t idx=0;
577 for(uint_t y=0; y<rows && idx<sz; ++y) {
578 snprintf(buf, sizeof(buf), "\n%3zu: ", (size_t)y);
579 s.append(buf);
580 for(uint_t x=0; x<m_row_len && idx<sz; ++x, ++idx) {
581 const uint_t t = m_matrix[m_row_len*y+x];
582 snprintf(buf, sizeof(buf), "%3zu, ", (size_t)t);
583 s.append(buf);
584 if( x < m_row_len-1 && ( x + 1 ) % token_per_row == 0 ) {
585 s.append("\n ");
586 }
587 }
588 }
589 s.append("]\n");
590 return s;
591 }
592
593 std::string to_string() const noexcept {
594 const uint_t sz = m_matrix.size();
595 const uint_t rows = sz / m_row_len;
596 return "token_fsm["+m_alphabet.to_string()+", "+std::to_string(count())+" token, sz "+
597 std::to_string(sz)+" cells / "+std::to_string(sz*sizeof(uint_t))
598 +" bytes, "+std::to_string(m_row_len)+"x"+std::to_string(rows)+
599 ", next_state "+std::to_string(m_next_state)+"]";
600 }
601 };
602
603 /**@}*/
604
605} // namespace jau::lexer
606
607
608
609#endif /* JAU_TOKEN_FSM_HPP_ */
Implementation of a dynamic linear array storage, aka vector, including relative positional access.
Definition darray.hpp:153
constexpr size_type size() const noexcept
Like std::vector::size().
Definition darray.hpp:1069
constexpr self_t & reserve(size_type new_capacity)
Like std::vector::reserve(), increases this instance's capacity to new_capacity.
Definition darray.hpp:1146
constexpr self_t & resize(size_type new_size, const value_type &val)
Like std::vector::resize(size_type, const value_type&)
Definition darray.hpp:1156
Base Alphabet Specification providing the alphabet for token_fsm.
Definition token_fsm.hpp:75
uint16_t code_point_t
Unsigned int symbol for alphabet code-point type.
Definition token_fsm.hpp:80
constexpr const std::string & name() const noexcept
Human readable name for this alphabet instance.
Definition token_fsm.hpp:99
code_point_t(* code_point_func)(const char c) noexcept
Definition token_fsm.hpp:87
std::string to_string() const noexcept
constexpr code_point_t base() const noexcept
The fixed base used for this alphabet, i.e.
constexpr code_point_t code_point(const char c) const noexcept
Returns the token of the given character or code_error if not element of this alphabet.
static constexpr const code_point_t code_error
token_error value, denoting an invalid alphabet code-point.
Definition token_fsm.hpp:85
alphabet(std::string _name, code_point_t _base, code_point_func _cpf) noexcept
Definition token_fsm.hpp:95
bool is_separator(const char c) const noexcept
Returns true if the given char is listed as a separator.
uint_t state_count() const noexcept
std::string to_string() const noexcept
uint_t get(const std::string_view &word) noexcept
Returns the token numerical name (terminal symbol) if found, otherwise token_error.
static constexpr const uint_t token_error
token_error value, denoting an invalid token or alphabet code-point.
State_type uint_t
Unsigned int symbol for token-value type.
size_t count() const noexcept
Returns the number of contained token.
token_fsm & operator=(const token_fsm &x) noexcept=default
uint_t next_state() const noexcept
bool empty() const noexcept
bool contains(uint_t token_name) const noexcept
Returns true if this FSM containes the given token name.
std::string fsm_to_string(const int token_per_row) const noexcept
token_fsm(const alphabet &alphabet, const std::vector< token_value_t > &key_words, const std::string_view separators="\040\011\012\015")
Constructs a new instance w/ given token_value_t name and value pairs.
bool add(const token_value_t &tkey_word)
Adds given token_value_t name and value pair.
static constexpr uint_t to_symbol(char c) noexcept
token_fsm & operator=(token_fsm &&x) noexcept=default
void clear() noexcept
Clears the FSM.
token_fsm(const token_fsm &src) noexcept=default
token_fsm(alphabet alphabet, const std::string_view separators="\040\011\012\015")
Constructs an empty instance.
result_t find(const std::string_view &haystack, int start=0) noexcept
Find a token within the given haystack, starting from given start position.
token_fsm(token_fsm &&src) noexcept=default
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition int_types.hpp:55
bool operator==(const alphabet &lhs, const alphabet &rhs) noexcept
bool operator!=(const alphabet &lhs, const alphabet &rhs) noexcept
std::string to_string(const alphabet &v) noexcept
STL namespace.
Result type for token_fsm::find()
std::string to_string() const noexcept
size_t source_begin
Position of first char of token in source.
uint_t token_name
Token numerical name (terminal symbol) if found, otherwise token_error.
size_t source_last
Last position in source after token.
Terminal token name and ASCII string value pair, provided by user.
std::string_view value
Token ASCII string value to be tokenized.
uint_t name
Token numerical name, a terminal symbol.
std::string to_string() const noexcept
#define JAU_TRACE_PRINT(...)
Definition token_fsm.hpp:52