jaulib v1.4.0-2-g788cf73
Jau Support Library (C++, Java, ..)
Loading...
Searching...
No Matches
string_util.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef JAU_STRING_UTIL_HPP_
26#define JAU_STRING_UTIL_HPP_
27
28#include <algorithm>
29#include <concepts>
30#include <cstdarg>
31#include <cstdint>
32#include <cstring>
33#include <string>
34#include <string_view>
35#include <type_traits>
36#include <unordered_map>
37#include <unordered_set>
38#include <vector>
39
40#include "jau/basic_types.hpp"
41#include "jau/type_info.hpp"
42
43#include <jau/byte_util.hpp>
44#include <jau/cpp_lang_util.hpp>
45#include <jau/int_math.hpp>
46#include <jau/int_types.hpp>
48#include <jau/string_cfmt.hpp>
50#include <jau/type_concepts.hpp>
51
52namespace jau {
53
54 /** @defgroup StringUtils String Utilities
55 * String utilities for type conversion and manipulation.
56 *
57 * @{
58 */
59
60 inline bool is_ascii_code(int c) noexcept {
61 return 0 != std::iscntrl(c) || 0 != std::isprint(c);
62 }
63
64 /**
65 * Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
66 * <p>
67 * The maximum length only delimits the string length and does not contain the EOS null byte.
68 * An EOS null byte will will be added.
69 * </p>
70 * <p>
71 * The source string within buffer is not required to contain an EOS null byte;
72 * </p>
73 */
74 std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept;
75
76 /** trim in place */
77 void trimInPlace(std::string &s) noexcept;
78
79 /** trim copy */
80 std::string trim(const std::string &s) noexcept;
81
82 /** Split given string `str` at `separator` into the resulting std::vector excluding the separator sequence . */
83 std::vector<std::string> split_string(const std::string &str, const std::string &separator) noexcept;
84
85 std::string &toLowerInPlace(std::string &s) noexcept;
86
87 std::string toLower(const std::string &s) noexcept;
88
89 /**
90 // *************************************************
91 // *************************************************
92 // *************************************************
93 */
94
95 enum class LoUpCase : bool {
96 lower = false,
97 upper = true
98 };
99
100 enum class PrefixOpt : bool {
101 none = false,
102 prefix = true
103 };
104
105 /**
106 * Converts a given hexadecimal string representation into a byte vector, lsb-first.
107 *
108 * In case a non valid hexadecimal digit appears in the given string,
109 * conversion ends and fills the byte vector up until the violation.
110 *
111 * In case hexstr contains an odd number of hex-nibbles, it will be interpreted as follows
112 * - 0xf[12] = 0x0f12 = { 0x12, 0x0f } - msb, 1st single low-nibble is most significant
113 * - [12]f = 0xf012 = { 0x12, 0xf0 } - lsb, last single high-nibble is most significant
114 *
115 * Even if complete==false, result holds the partial value if consumed_chars>0.
116 *
117 * You may use C++17 structured bindings to handle the pair.
118 *
119 * @param out the byte vector sink, lsb-first
120 * @param hexstr the hexadecimal string representation
121 * @param hexstr_len length of hextstr
122 * @param byteOrder lb_endian_t::big for big-endian bytes in `hexstr` (default)
123 * @param checkPrefix if True, checks for a leading `0x` and removes it, otherwise not.
124 * @return pair [size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
125 */
126 SizeBoolPair fromHexString(std::vector<uint8_t> &out, const uint8_t hexstr[], const size_t hexstr_len,
127 const lb_endian_t byteOrder = lb_endian_t::big, const Bool checkPrefix = Bool::True) noexcept;
128
129 /** See hexStringBytes() */
130 inline SizeBoolPair fromHexString(std::vector<uint8_t> &out, const std::string_view hexstr,
131 const lb_endian_t byteOrder = lb_endian_t::big, const Bool checkPrefix = Bool::True) noexcept {
132 return jau::fromHexString(out, cast_char_ptr_to_uint8(hexstr.data()), hexstr.length(), byteOrder, checkPrefix); // NOLINT(bugprone-suspicious-stringview-data-usage)
133 }
134
135 /**
136 * Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes().
137 *
138 * Even if complete==false, result holds the partial value if consumed_chars>0.
139 *
140 * You may use C++17 structured bindings to handle the tuple.
141 *
142 * @param hexstr the hexadecimal string representation
143 * @param byteOrder lb_endian_t::big for big-endian bytes in `hexstr` (default)
144 * @param checkPrefix if True, checks for a leading `0x` and removes it, otherwise not.
145 * @return tuple [uint64_t result, size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
146 * @see hexStringBytes()
147 * @see to_hexstring()
148 */
149 UInt64SizeBoolTuple fromHexString(std::string_view const hexstr, const lb_endian_t byteOrder = lb_endian_t::big,
150 const Bool checkPrefix = Bool::True) noexcept;
151
152 inline constexpr const char *HexadecimalArray = "0123456789abcdef";
153
154 /**
155 * Produce a hexadecimal string representation of the given lsb-first byte values.
156 *
157 * If byteOrder is lb_endian_t::little, orders lsb-byte left, usual for byte streams. Result will not have a leading `0x`.
158 * Otherwise, lb_endian_t::big (default), orders msb-byte left for integer values. Result will have a leading `0x` if !skipPrefix.
159 *
160 * @param data pointer to the first byte to print, lsb-first
161 * @param length number of bytes to print
162 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
163 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
164 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
165 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
166 * @return the hex-string representation of the data
167 */
168 std::string toHexString(const void *data, const nsize_t length,
169 const lb_endian_t byteOrder = lb_endian_t::big, const LoUpCase capitalization = LoUpCase::lower,
170 const PrefixOpt prefix = PrefixOpt::prefix) noexcept;
171
172 /**
173 * Produce a hexadecimal string representation of the given byte value and appends it to the given string
174 * @param dest the std::string reference destination to append
175 * @param value the byte value to represent
176 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
177 * @return the given std::string reference for chaining
178 */
179 std::string &appendToHexString(std::string &dest, const uint8_t value, const LoUpCase capitalization = LoUpCase::lower) noexcept;
180
181 /**
182 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given pointer.
183 * @tparam value_type a pointer type
184 * @param v the pointer of given pointer type
185 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
186 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
187 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
188 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
189 * @return the hex-string representation of the value
190 * @see bytesHexString()
191 * @see from_hexstring()
192 */
193 template<class value_type>
196 inline std::string toHexString(value_type const &v, const lb_endian_t byteOrder = lb_endian_t::big,
197 const LoUpCase capitalization = LoUpCase::lower,
198 const PrefixOpt prefix = PrefixOpt::prefix) noexcept
199 {
200#if defined(__EMSCRIPTEN__) // jau::os::is_generic_wasm()
201 static_assert(is_little_endian()); // Bug in emscripten, unable to deduce uint16_t, uint32_t or uint64_t override of cpu_to_le() or bswap()
202 const uintptr_t v_le = reinterpret_cast<uintptr_t>(v);
203 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
204 byteOrder, capitalization, prefix);
205#else
206 const uintptr_t v_le = jau::cpu_to_le(reinterpret_cast<uintptr_t>(v));
207 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
208 byteOrder, capitalization, prefix);
209#endif
210 }
211
212 /**
213 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given uint8_t continuous container values.
214 * @tparam uint8_container_type a uint8_t continuous container type
215 * @param bytes the value of given uint8_t continuous container type
216 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
217 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
218 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
219 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
220 * @return the hex-string representation of the value
221 * @see bytesHexString()
222 * @see from_hexstring()
223 */
224 template<class uint8_container_type>
226 std::convertible_to<typename uint8_container_type::value_type, uint8_t>
227 inline std::string toHexString(const uint8_container_type &bytes,
228 const lb_endian_t byteOrder = lb_endian_t::big, const LoUpCase capitalization = LoUpCase::lower,
229 const PrefixOpt skipPrefix = PrefixOpt::prefix) noexcept
230 {
231 return toHexString((const uint8_t *)bytes.data(), bytes.size(), byteOrder, capitalization, skipPrefix);
232 }
233
234 /**
235 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given value with standard layout.
236 * @tparam value_type a standard layout value type
237 * @param v the value of given standard layout type
238 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
239 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
240 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
241 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
242 * @return the hex-string representation of the value
243 * @see bytesHexString()
244 * @see from_hexstring()
245 */
246 template<class value_type>
251 inline std::string toHexString(value_type const &v, const lb_endian_t byteOrder = lb_endian_t::big,
252 const LoUpCase capitalization = LoUpCase::lower,
253 const PrefixOpt prefix = PrefixOpt::prefix) noexcept {
254 if constexpr ( is_little_endian() ) {
255 return toHexString(pointer_cast<const uint8_t *>(&v), sizeof(v),
256 byteOrder, capitalization, prefix);
257 } else {
258 const value_type v_le = jau::bswap(v);
259 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v),
260 byteOrder, capitalization, prefix);
261 }
262 }
263
264 /**
265 // *************************************************
266 // *************************************************
267 // *************************************************
268 */
269
270 /**
271 * Converts a given binary string representation into a byte vector, lsb-first.
272 *
273 * In case a non valid binary digit appears in the given string,
274 * conversion ends and fills the byte vector up until the violation.
275 *
276 * In case bitstr contains an incomplete number of bit-nibbles, it will be interpreted as follows
277 * - 0b11[00000001] = 0x0301 = { 0x01, 0x03 } - msb, 1st single low-nibble is most significant
278 * - 0b[01000000]11 = 0xC040 = { 0x40, 0xC0 } - lsb, last single high-nibble is most significant
279 * - 11 -> 11000000 -> C0
280 *
281 * Even if complete==false, result holds the partial value if consumed_chars>0.
282 *
283 * You may use C++17 structured bindings to handle the pair.
284 *
285 * @param out the byte vector sink, lsb-first
286 * @param bitstr the binary string representation
287 * @param bitstr_len length of bitstr
288 * @param bitOrder bit_order_t::msb for most significant bits in `bitstr` first (default)
289 * @param checkPrefix if True, checks for a leading `0b` and removes it, otherwise not.
290 * @return pair [size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
291 */
292 SizeBoolPair fromBitString(std::vector<uint8_t> &out, const uint8_t bitstr[], const size_t bitstr_len,
293 const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept;
294
295 /** See fromBitString() */
296 inline SizeBoolPair fromBitString(std::vector<uint8_t> &out, const std::string_view bitstr,
297 const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept {
298 return jau::fromBitString(out, cast_char_ptr_to_uint8(bitstr.data()), bitstr.length(), bitOrder, checkPrefix); // NOLINT(bugprone-suspicious-stringview-data-usage)
299 }
300
301 /**
302 * Converts a given binary string representation into a uint64_t value according to bitStringBytes().
303 *
304 * Even if complete==false, result holds the partial value if consumed_chars>0.
305 *
306 * You may use C++17 structured bindings to handle the tuple.
307 *
308 * @param bitstr the binary string representation
309 * @param checkPrefix if true, checks for a leading `0b` and removes it, otherwise not.
310 * @param bitOrder bit_order_t::msb for most significant bits in `bitstr` first (default)
311 * @return tuple [uint64_t result, size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
312 * @see bitStringBytes()
313 * @see to_bitstring()
314 */
315 UInt64SizeBoolTuple fromBitString(std::string_view const bitstr, const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept;
316
317 /**
318 * Produce a binary string representation of the given lsb-first byte values.
319 *
320 * If byteOrder is lb_endian_t::little, orders lsb-byte left, usual for byte streams. Result will not have a leading `0b`.
321 * Otherwise, lb_endian_t::big (default), orders msb-byte left for integer values. Result will have a leading `0b` if !skipPrefix.
322 *
323 * @param data pointer to the first byte to print, lsb-first
324 * @param length number of bytes to print
325 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
326 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
327 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
328 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
329 * @return the bit-string representation of the data
330 */
331 std::string toBitString(const void *data, const nsize_t length,
333 size_t bit_len=0) noexcept;
334
335 /**
336 * Produce a binary string representation with leading `0b` in MSB of the given uint8_t continuous container values.
337 * @tparam uint8_container_type a uint8_t continuous container type
338 * @param bytes the value of given uint8_t continuous container type
339 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
340 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
341 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
342 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
343 * @return the bit-string representation of the value
344 * @see bytesBitString()
345 * @see from_bitstring()
346 */
347 template<class uint8_container_type>
348 requires jau::req::contiguous_container<uint8_container_type> &&
349 std::convertible_to<typename uint8_container_type::value_type, uint8_t>
350 inline std::string toBitString(const uint8_container_type &bytes,
351 const bit_order_t bitOrder = bit_order_t::msb, const PrefixOpt prefix = PrefixOpt::prefix, size_t bit_len=0) noexcept {
352 return toBitString((const uint8_t *)bytes.data(), bytes.size(), bitOrder, prefix, bit_len);
353 }
354
355 /**
356 * Produce a binary string representation with leading `0b` in MSB of the given value with standard layout.
357 * @tparam value_type a standard layout value type
358 * @param v the value of given standard layout type
359 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
360 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
361 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
362 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
363 * @return the bit-string representation of the value
364 * @see bytesBitString()
365 * @see from_bitstring()
366 */
367 template<class value_type>
372 inline std::string toBitString(value_type const &v, const bit_order_t bitOrder = bit_order_t::msb,
373 const PrefixOpt prefix = PrefixOpt::prefix, size_t bit_len=0) noexcept
374 {
375 if constexpr ( is_little_endian() ) {
376 return toBitString(pointer_cast<const uint8_t *>(&v), sizeof(v),
377 bitOrder, prefix, bit_len);
378 } else {
379 const value_type v_le = jau::bswap(v);
380 return toBitString(pointer_cast<const uint8_t *>(&v_le), sizeof(v),
381 bitOrder, prefix, bit_len);
382 }
383 }
384
385 /**
386 // *************************************************
387 // *************************************************
388 // *************************************************
389 */
390
391 /**
392 * Produce a decimal string representation of an integral integer value.
393 * @tparam value_type an integral integer type
394 * @param v the integral integer value
395 * @param separator if not 0, use as separation character, otherwise no separation characters are being used
396 * @param width the minimum number of characters to be printed. Add padding with blank space if result is shorter.
397 * @return the string representation of the integral integer value
398 */
399 template<class value_type,
400 std::enable_if_t<std::is_integral_v<value_type>,
401 bool> = true>
402 std::string to_decstring(const value_type &v, const char separator = ',', const nsize_t width = 0) noexcept {
403 const snsize_t v_sign = jau::sign<value_type>(v);
404 const size_t digit10_count1 = jau::digits10<value_type>(v, v_sign, true /* sign_is_digit */);
405 const size_t digit10_count2 = v_sign < 0 ? digit10_count1 - 1 : digit10_count1; // less sign
406
407 const size_t separator_count = separator ? (digit10_count1 - 1) / 3 : 0;
408 const size_t net_chars = digit10_count1 + separator_count;
409 const size_t total_chars = std::max<size_t>(width, net_chars);
410 std::string res(total_chars, ' ');
411
412 value_type n = v;
413 size_t char_iter = 0;
414
415 for ( size_t digit10_iter = 0; digit10_iter < digit10_count2 /* && char_iter < total_chars */; digit10_iter++ ) {
416 const int digit = v_sign < 0 ? invert_sign(n % 10) : n % 10;
417 n /= 10;
418 if ( separator && 0 < digit10_iter && 0 == digit10_iter % 3 ) {
419 res[total_chars - 1 - (char_iter++)] = separator;
420 }
421 res[total_chars - 1 - (char_iter++)] = '0' + digit;
422 }
423 if ( v_sign < 0 /* && char_iter < total_chars */ ) {
424 res[total_chars - 1 - (char_iter++)] = '-';
425 }
426 return res;
427 }
428
429 /**
430 // *************************************************
431 // *************************************************
432 // *************************************************
433 */
434
435 /**
436 * Produce a string representation of an unsigned integral integer value with given radix.
437 * @tparam value_type an unsigned integral integer type
438 * @param v the unsigned integral integer value
439 * @param radix base of the number system, supported: 2 binary, 8 octal, 10 decimal, 16 hexadecimal
440 * @param prefix pass PrefixOpt::prefix (default) to add leading prefix for radix. Prefixes: `0x` hex, `0` octal and `0b` binary.
441 * @param min_width the minimum number of characters to be printed including prefix. Add padding with `padding` if result is shorter.
442 * @param separator separator character for each decimal 3 or other radix 4. Defaults to 0 for no separator.
443 * @param padding padding character, defaults to '0'. See 'min_width' above.
444 * @return the string representation of the unsigned integral integer value with given radix
445 */
446 template<class value_type,
447 std::enable_if_t<std::is_integral_v<value_type> &&
448 std::is_unsigned_v<value_type>,
449 bool> = true>
450 std::string to_string(value_type v, const nsize_t radix, const PrefixOpt prefix = PrefixOpt::prefix,
451 size_t min_width = 0, const char separator = 0, const char padding = '0') noexcept
452 {
453 nsize_t shift;
454 switch ( radix ) {
455 case 16: shift = 4; break;
456 case 10: shift = 0; break;
457 case 8: shift = 3; break;
458 case 2: shift = 1; break;
459 default: return "";
460 }
461 const nsize_t mask = radix - 1; // ignored for radix 10
462 const size_t val_digits = jau::digits<value_type>(v, radix);
463 const size_t prefix_len = (PrefixOpt::none == prefix || 10 == radix) ? 0 : (8 == radix ? 1 : 2);
464 const size_t separator_gap = 10 == radix ? 3 : 4;
465 size_t separator_count;
466 if ( separator && '0' == padding ) {
467 // separator inside padding
468 if ( min_width > prefix_len ) {
469 const size_t len0 = std::max<size_t>(min_width - prefix_len, val_digits);
470 separator_count = (len0 - 1) / separator_gap;
471 if ( val_digits + separator_count + prefix_len > min_width ) {
472 --separator_count; // fix down
473 }
474 } else {
475 separator_count = (val_digits - 1) / separator_gap;
476 }
477 } else if ( separator ) {
478 // separator w/o padding
479 separator_count = (val_digits - 1) / separator_gap;
480 } else {
481 separator_count = 0;
482 }
483 size_t len = std::max<size_t>(min_width, val_digits + separator_count + prefix_len);
484
485 std::string str(len, ' ');
486 size_t digit_idx = 0, separator_idx = 0;
487 while ( len > prefix_len ) {
488 if ( separator_idx < separator_count && 0 < digit_idx && 0 == digit_idx % separator_gap ) {
489 str[--len] = separator;
490 ++separator_idx;
491 }
492 if ( len > prefix_len ) {
493 if ( 10 != radix ) {
494 str[--len] = digit_idx < val_digits ? HexadecimalArray[v & mask] : padding;
495 v >>= shift;
496 } else {
497 str[--len] = digit_idx < val_digits ? '0' + (v % 10) : padding;
498 v /= 10;
499 }
500 ++digit_idx;
501 }
502 }
503 if ( len > 0 ) {
504 switch ( radix ) { // NOLINT(bugprone-switch-missing-default-case)
505 case 16: str[--len] = 'x'; break;
506 case 8: str[--len] = '0'; break;
507 case 2: str[--len] = 'b'; break;
508 }
509 if ( len > 0 ) {
510 str[--len] = '0';
511 }
512 }
513 return str;
514 }
515
516 /**
517 // *************************************************
518 // *************************************************
519 // *************************************************
520 */
521
522 namespace impl {
523 template<typename... Args>
524 constexpr std::string format_string_n(const std::size_t maxStrLen, const std::string_view &format, const Args &...args) {
525 std::string str;
526 str.reserve(maxStrLen + 1); // incl. EOS
527 str.resize(maxStrLen); // excl. EOS
528
529 // -Wformat=2 -> -Wformat -Wformat-nonliteral -Wformat-security -Wformat-y2k
530 // -Wformat=2 -Wformat-overflow=2 -Wformat-signedness
534 const size_t nchars = std::snprintf(&str[0], maxStrLen + 1, format.data(), args...); // NOLINT
536 if ( nchars < maxStrLen + 1 ) {
537 str.resize(nchars);
538 str.shrink_to_fit();
539 } // else truncated w/ nchars > MaxStrLen
540 return str;
541 }
542
543 template <typename... Args>
544 constexpr std::string format_string_h(const std::size_t strLenHint, const std::string_view format, const Args &...args) {
545 size_t nchars;
546 std::string str;
547 {
548 const size_t bsz = strLenHint + 1; // including EOS
549 str.reserve(bsz); // incl. EOS
550 str.resize(bsz - 1); // excl. EOS
551
552 // -Wformat=2 -> -Wformat -Wformat-nonliteral -Wformat-security -Wformat-y2k
553 // -Wformat=2 -Wformat-overflow=2 -Wformat-signedness
557 nchars = std::snprintf(&str[0], bsz, format.data(), args...); // NOLINT
559 if ( nchars < bsz ) {
560 str.resize(nchars);
561 str.shrink_to_fit();
562 return str;
563 }
564 }
565 {
566 const size_t bsz = std::min<size_t>(nchars + 1, str.max_size() + 1); // limit incl. EOS
567 str.reserve(bsz); // incl. EOS
568 str.resize(bsz - 1); // excl. EOS
569
570 // -Wformat=2 -> -Wformat -Wformat-nonliteral -Wformat-security -Wformat-y2k
571 // -Wformat=2 -Wformat-overflow=2 -Wformat-signedness
575 nchars = std::snprintf(&str[0], bsz, format.data(), args...); // NOLINT
577
578 str.resize(nchars);
579 return str;
580 }
581 }
582 } // namespace impl
583
584 /**
585 * Safely returns a (potentially truncated) string according to `snprintf()` formatting rules
586 * and variable number of arguments following the `format` argument.
587 *
588 * jau::cfmt2::checkR2() is utilize to validate `format` against given arguments at *runtime*
589 * and throws jau::IllegalArgumentError on mismatch.
590 *
591 * Resulting string is truncated to `min(maxStrLen, formatLen)`,
592 * with `formatLen` being the given formatted string length of output w/o limitation.
593 *
594 * @param maxStrLen maximum resulting string length
595 * @param format `printf()` compliant format string
596 * @param args optional arguments matching the format string
597 */
598 template<typename... Args>
599 constexpr std::string format_string_n(const std::size_t maxStrLen, const std::string_view &format, const Args &...args) {
600 const jau::cfmt::PResult pr = jau::cfmt::checkR2<Args...>(format);
601 if ( pr.argCount() < 0 ) {
602 throw jau::IllegalArgumentError("format/arg mismatch `" + std::string(format) + "`: " + pr.toString(), E_FILE_LINE);
603 }
604 return impl::format_string_n(maxStrLen, format, args...);
605 }
606
607 /**
608 * Safely returns a (potentially truncated) string according to `snprintf()` formatting rules
609 * and variable number of arguments following the `format` argument.
610 *
611 * jau::cfmt2::checkR2() is utilize to validate `format` against given arguments at *compile time*
612 * and fails to compile on mismatch.
613 *
614 * Resulting string is truncated to `min(maxStrLen, formatLen)`,
615 * with `formatLen` being the given formatted string length of output w/o limitation.
616 *
617 * @tparam format `printf()` compliant format string
618 * @param maxStrLen maximum resulting string length
619 * @param args optional arguments matching the format string
620 */
621 template <StringLiteral format, typename... Args>
622 consteval_cxx20 std::string format_string_n(const std::size_t maxStrLen, const Args &...args) {
623 static_assert(0 <= jau::cfmt::checkR2<Args...>(format.view()).argCount());
624 return impl::format_string_n(maxStrLen, format.view(), args...);
625 }
626
627 /**
628 * Safely returns a (non-truncated) string according to `snprintf()` formatting rules
629 * and variable number of arguments following the `format` argument.
630 *
631 * jau::cfmt2::checkR2() is utilize to validate `format` against given arguments at *runtime*
632 * and throws jau::IllegalArgumentError on mismatch.
633 *
634 * Resulting string size matches formated output w/o limitation.
635 *
636 * @param strLenHint initially used string length w/o EOS
637 * @param format `printf()` compliant format string
638 * @param args optional arguments matching the format string
639 */
640 template <typename... Args>
641 constexpr std::string format_string_h(const std::size_t strLenHint, const std::string_view format, const Args &...args) {
642 const jau::cfmt::PResult pr = jau::cfmt::checkR2<Args...>(format);
643 if ( pr.argCount() < 0 ) {
644 throw jau::IllegalArgumentError("format/arg mismatch `" + std::string(format) + "`: " + pr.toString(), E_FILE_LINE);
645 }
646 return impl::format_string_h(strLenHint, format, args...);
647 }
648
649 /**
650 * Safely returns a (non-truncated) string according to `snprintf()` formatting rules
651 * and variable number of arguments following the `format` argument.
652 *
653 * jau::cfmt2::checkR2() is utilize to validate `format` against given arguments at *runtime*
654 * and throws jau::IllegalArgumentError on mismatch.
655 *
656 * Resulting string size matches formated output w/o limitation.
657 *
658 * @param format `printf()` compliant format string
659 * @param args optional arguments matching the format string
660 */
661 template <typename... Args>
662 constexpr std::string format_string(const std::string_view format, const Args &...args) {
663 return format_string_h(1023, format, args...);
664 }
665
666 /**
667 * Safely returns a (non-truncated) string according to `snprintf()` formatting rules
668 * and variable number of arguments following the `format` argument.
669 *
670 * jau::cfmt2::checkR2() is utilize to validate `format` against given arguments at *compile time*
671 * and fails to compile on mismatch.
672 *
673 * Resulting string size matches formated output w/o limitation.
674 *
675 * @tparam format `printf()` compliant format string
676 * @param args optional arguments matching the format string
677 */
678 template <StringLiteral format, typename... Args>
679 consteval_cxx20 std::string format_string(const Args &...args) {
680 static_assert(0 <= jau::cfmt::checkR2<Args...>(format.view()).argCount());
681 return impl::format_string_h(1023, format.view(), args...);
682 }
683
684 /**
685 // *************************************************
686 // *************************************************
687 // *************************************************
688 */
689
690 template<class value_type,
691 std::enable_if_t<(std::is_integral_v<value_type> && !std::is_same_v<bool, value_type>) ||
692 std::is_floating_point_v<value_type>,
693 bool> = true>
694 inline std::string to_string(const value_type &ref) {
695 return std::to_string(ref);
696 }
697
698 template<class value_type,
699 std::enable_if_t<std::is_same_v<bool, value_type>,
700 bool> = true>
701 inline std::string to_string(const value_type &ref) {
702 return ref ? "T" : "F";
703 }
704
705 template<class value_type,
706 std::enable_if_t<!std::is_integral_v<value_type> &&
707 !std::is_floating_point_v<value_type> &&
708 std::is_base_of_v<std::string, value_type>,
709 bool> = true>
710 inline std::string to_string(const value_type &ref) {
711 return ref;
712 }
713
714 template<class value_type,
715 std::enable_if_t<!std::is_integral_v<value_type> &&
716 !std::is_floating_point_v<value_type> &&
717 !std::is_base_of_v<std::string, value_type> &&
718 std::is_base_of_v<std::string_view, value_type>,
719 bool> = true>
720 inline std::string to_string(const value_type &ref) {
721 return std::string(ref);
722 }
723
724 template<class value_type,
725 std::enable_if_t<!std::is_integral_v<value_type> &&
726 !std::is_floating_point_v<value_type> &&
727 !std::is_base_of_v<std::string, value_type> &&
728 !std::is_base_of_v<std::string_view, value_type> &&
729 std::is_pointer_v<value_type>,
730 bool> = true>
731 inline std::string to_string(const value_type &ref) {
732 return toHexString((void *)ref); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
733 }
734
735 template<class value_type,
736 std::enable_if_t<!std::is_integral_v<value_type> &&
737 !std::is_floating_point_v<value_type> &&
738 !std::is_base_of_v<std::string, value_type> &&
739 !std::is_base_of_v<std::string_view, value_type> &&
740 !std::is_pointer_v<value_type> &&
742 bool> = true>
743 inline std::string to_string(const value_type &ref) {
744 return ref.toString();
745 }
746
747 template<class value_type,
748 std::enable_if_t<!std::is_integral_v<value_type> &&
749 !std::is_floating_point_v<value_type> &&
750 !std::is_base_of_v<std::string, value_type> &&
751 !std::is_base_of_v<std::string_view, value_type> &&
752 !std::is_pointer_v<value_type> &&
755 bool> = true>
756 inline std::string to_string(const value_type &ref) {
757 return ref.to_string();
758 }
759
760 template<class value_type,
761 std::enable_if_t<!std::is_integral_v<value_type> &&
762 !std::is_floating_point_v<value_type> &&
763 !std::is_base_of_v<std::string, value_type> &&
764 !std::is_base_of_v<std::string_view, value_type> &&
765 !std::is_pointer_v<value_type> &&
769 bool> = true>
770 inline std::string to_string(const value_type &ref) {
771 return toHexString((void *)ref.operator->());
772 }
773
774 template<class value_type,
775 std::enable_if_t<!std::is_integral_v<value_type> &&
776 !std::is_floating_point_v<value_type> &&
777 !std::is_base_of_v<std::string, value_type> &&
778 !std::is_base_of_v<std::string_view, value_type> &&
779 !std::is_pointer_v<value_type> &&
783 bool> = true>
784 inline std::string to_string(const value_type &ref) {
785 (void)ref;
786 return "jau::to_string<T> n/a for type " + jau::static_ctti<value_type>().toString();
787 }
788
789 template<typename T>
790 std::string to_string(std::vector<T> const &list, const std::string &delim) {
791 if ( list.empty() ) {
792 return std::string();
793 }
794 bool need_delim = false;
795 std::string res;
796 for ( const T &e : list ) {
797 if ( need_delim ) {
798 res.append(delim);
799 }
800 res.append(to_string(e));
801 need_delim = true;
802 }
803 return res;
804 }
805 template<typename T>
806 std::string to_string(std::vector<T> const &list) { return to_string<T>(list, ", "); }
807
808 template<typename T>
809 std::string to_string(std::vector<T> const &list, const std::string &delim, const nsize_t radix) {
810 if ( list.empty() ) {
811 return std::string();
812 }
813 bool need_delim = false;
814 std::string res;
815 for ( const T &e : list ) {
816 if ( need_delim ) {
817 res.append(delim);
818 }
819 res.append(to_string(e, radix));
820 need_delim = true;
821 }
822 return res;
823 }
824 template<typename T>
825 std::string to_string(std::vector<T> const &list, const nsize_t radix) { return to_string<T>(list, ", ", radix); }
826
827 /**
828 * Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion via `std::strtoll`.
829 *
830 * Even if complete==false, result holds the partial value if consumed_chars>0.
831 *
832 * You may use C++17 structured bindings to handle the tuple.
833 */
834 Int64SizeBoolTuple to_integer(const char *str, size_t str_len, const nsize_t radix = 10, const char limiter = '\0', const char *limiter_pos = nullptr);
835
836
837 /**
838 * Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion via `std::strtoll`.
839 *
840 * Even if complete==false, result holds the partial value if consumed_chars>0.
841 *
842 * You may use C++17 structured bindings to handle the tuple.
843 */
844 inline Int64SizeBoolTuple to_integer(const std::string_view str, const nsize_t radix = 10, const char limiter = '\0', const char *limiter_pos = nullptr) {
845 return to_integer(str.data(), str.length(), radix, limiter, limiter_pos);
846 }
847
848 /**
849 * C++20: Heterogeneous Lookup in (Un)ordered Containers
850 *
851 * @see https://www.cppstories.com/2021/heterogeneous-access-cpp20/
852 */
853 struct string_hash {
854 using is_transparent = void;
855 [[nodiscard]] size_t operator()(const char *txt) const {
856 return std::hash<std::string_view>{}(txt);
857 }
858 [[nodiscard]] size_t operator()(std::string_view txt) const {
859 return std::hash<std::string_view>{}(txt);
860 }
861 [[nodiscard]] size_t operator()(const std::string &txt) const {
862 return std::hash<std::string>{}(txt);
863 }
864 };
865
866 template<typename T>
867 using StringHashMap = std::unordered_map<std::string, T, string_hash, std::equal_to<>>;
868
869 using StringHashSet = std::unordered_set<std::string, string_hash, std::equal_to<>>;
870
871 /**@}*/
872
873} // namespace jau
874
875#define jau_format_string_static(...) \
876 jau::format_string(__VA_ARGS__); \
877 static_assert(0 <= jau::cfmt::checkR(__VA_ARGS__).argCount()); // compile time validation!
878
879/** \example test_intdecstring01.cpp
880 * This C++ unit test validates the jau::to_decstring implementation
881 */
882
883#endif /* JAU_STRING_UTIL_HPP_ */
#define E_FILE_LINE
std::string toString() const noexcept
C++ Named Requirement Container (partial)
C++ Named Requirement ContiguousContainer (partial)
Concept of type-trait std::is_pointer.
Concept of type-trait std::is_standard_layout.
Concept of type-trait std::is_trivially_copyable.
constexpr bool is_little_endian() noexcept
Evaluates true if platform is running in little endian mode, i.e.
constexpr uint16_t bswap(uint16_t const source) noexcept
Definition byte_util.hpp:88
lb_endian_t
Simplified reduced endian type only covering little- and big-endian.
constexpr uint16_t cpu_to_le(uint16_t const h) noexcept
const uint8_t * cast_char_ptr_to_uint8(const char *s) noexcept
@ big
Identifier for big endian.
@ big
Identifier for big endian, equivalent to endian::big.
#define PRAGMA_DISABLE_WARNING_FORMAT_SECURITY
constexpr bool value(const Bool rhs) noexcept
constexpr bool has_toString_v
constexpr bool has_member_of_pointer_v
constexpr bool has_to_string_v
#define PRAGMA_DISABLE_WARNING_PUSH
constexpr std::enable_if_t< sizeof(Dest)==sizeof(Source) &&std::is_pointer_v< Source > &&std::is_pointer_v< Dest >, Dest > pointer_cast(const Source &src) noexcept
A constexpr pointer cast implementation for C++17, inspired by C++20 bit_cast<>(arg).
const jau::type_info & static_ctti() noexcept
Returns a static global reference of make_ctti<T>(true) w/ identity instance.
#define consteval_cxx20
consteval qualifier replacement for C++20 consteval.
#define PRAGMA_DISABLE_WARNING_POP
#define PRAGMA_DISABLE_WARNING_FORMAT_NONLITERAL
Bool
Boolean type without implicit conversion, safe for function parameter.
constexpr T invert_sign(const T x) noexcept
Safely inverts the sign of an arithmetic number w/ branching in O(1)
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition int_types.hpp:55
constexpr size_t digits(const T x, const nsize_t radix) noexcept
Returns the number of digits of the given unsigned integral value number and the given radix.
Definition int_math.hpp:476
constexpr size_t digits10(const T x, const snsize_t x_sign, const bool sign_is_digit=true) noexcept
Returns the number of decimal digits of the given integral value number using std::log10<T>().
Definition int_math.hpp:436
int_fast32_t snsize_t
Natural 'ssize_t' alternative using int_fast32_t as its natural sized type.
Definition int_types.hpp:67
constexpr int sign(const T x) noexcept
Returns the value of the sign function (w/o branching ?) in O(1).
Definition base_math.hpp:81
std::string & toLowerInPlace(std::string &s) noexcept
std::string trim(const std::string &s) noexcept
trim copy
void trimInPlace(std::string &s) noexcept
trim in place
std::string toBitString(const void *data, const nsize_t length, const bit_order_t bitOrder=bit_order_t::msb, const PrefixOpt prefix=PrefixOpt::prefix, size_t bit_len=0) noexcept
Produce a binary string representation of the given lsb-first byte values.
constexpr PResult checkR2(const std::string_view fmt) noexcept
Strict type validation of arguments against the format string.
constexpr std::string format_string_n(const std::size_t maxStrLen, const std::string_view &format, const Args &...args)
Safely returns a (potentially truncated) string according to snprintf() formatting rules and variable...
std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept
Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
constexpr std::string format_string(const std::string_view format, const Args &...args)
Safely returns a (non-truncated) string according to snprintf() formatting rules and variable number ...
std::unordered_set< std::string, string_hash, std::equal_to<> > StringHashSet
Int64SizeBoolTuple to_integer(const char *str, size_t str_len, const nsize_t radix=10, const char limiter='\0', const char *limiter_pos=nullptr)
Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion ...
SizeBoolPair fromBitString(std::vector< uint8_t > &out, const uint8_t bitstr[], const size_t bitstr_len, const bit_order_t bitOrder=bit_order_t::msb, const Bool checkPrefix=Bool::True) noexcept
Converts a given binary string representation into a byte vector, lsb-first.
std::unordered_map< std::string, T, string_hash, std::equal_to<> > StringHashMap
std::vector< std::string > split_string(const std::string &str, const std::string &separator) noexcept
Split given string str at separator into the resulting std::vector excluding the separator sequence .
constexpr const char * HexadecimalArray
constexpr std::string format_string_h(const std::size_t strLenHint, const std::string_view format, const Args &...args)
Safely returns a (non-truncated) string according to snprintf() formatting rules and variable number ...
std::string & appendToHexString(std::string &dest, const uint8_t value, const LoUpCase capitalization=LoUpCase::lower) noexcept
Produce a hexadecimal string representation of the given byte value and appends it to the given strin...
bool is_ascii_code(int c) noexcept
std::string toLower(const std::string &s) noexcept
SizeBoolPair fromHexString(std::vector< uint8_t > &out, const uint8_t hexstr[], const size_t hexstr_len, const lb_endian_t byteOrder=lb_endian_t::big, const Bool checkPrefix=Bool::True) noexcept
Converts a given hexadecimal string representation into a byte vector, lsb-first.
std::string toHexString(const void *data, const nsize_t length, const lb_endian_t byteOrder=lb_endian_t::big, const LoUpCase capitalization=LoUpCase::lower, const PrefixOpt prefix=PrefixOpt::prefix) noexcept
Produce a hexadecimal string representation of the given lsb-first byte values.
std::string to_decstring(const value_type &v, const char separator=',', const nsize_t width=0) noexcept
Produce a decimal string representation of an integral integer value.
constexpr std::string format_string_n(const std::size_t maxStrLen, const std::string_view &format, const Args &...args)
constexpr std::string format_string_h(const std::size_t strLenHint, const std::string_view format, const Args &...args)
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition backtrace.hpp:32
bit_order_t
Bit order type, i.e.
@ msb
Identifier for most-significant-bit (msb) first.
std::string to_string(const bit_order_t v) noexcept
Return std::string representation of the given bit_order_t.
STL namespace.
Simple pre-defined value tuple [int64_t, size_t, bool] for structured bindings to multi-values.
Simple pre-defined value pair [size_t, bool] for structured bindings to multi-values.
constexpr ssize_t argCount() const noexcept
std::string toString() const
C++20: Heterogeneous Lookup in (Un)ordered Containers.
size_t operator()(const std::string &txt) const
size_t operator()(const char *txt) const
size_t operator()(std::string_view txt) const
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition int_types.hpp:55