jaulib v1.4.1-14-g15926ba
Jau Support Library (C++, Java, ..)
Loading...
Searching...
No Matches
string_util.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef JAU_STRING_UTIL_HPP_
26#define JAU_STRING_UTIL_HPP_
27
28#include <algorithm>
29#include <concepts>
30#include <cstdarg>
31#include <cstdint>
32#include <cstring>
33#include <limits>
34#include <string>
35#include <string_view>
36#include <type_traits>
37#include <vector>
38
39#include "jau/type_info.hpp"
40
41#include <jau/byte_util.hpp>
42#include <jau/cpp_lang_util.hpp>
44
45#include <jau/int_math.hpp>
46#include <jau/int_types.hpp>
48
50#include <jau/type_concepts.hpp>
51
52namespace jau {
53
54 /** @defgroup StringUtils String Utilities
55 * String utilities for type conversion and manipulation.
56 *
57 * @{
58 */
59
60 inline bool is_ascii_code(int c) noexcept {
61 return 0 != std::iscntrl(c) || 0 != std::isprint(c);
62 }
63
64 constexpr bool is_digit(char c) noexcept { return '0' <= c && c <= '9'; }
65
66 /**
67 * Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
68 * <p>
69 * The maximum length only delimits the string length and does not contain the EOS null byte.
70 * An EOS null byte will will be added.
71 * </p>
72 * <p>
73 * The source string within buffer is not required to contain an EOS null byte;
74 * </p>
75 */
76 std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept;
77
78 /** trim in place */
79 void trimInPlace(std::string &s) noexcept;
80
81 /** trim copy */
82 std::string trim(const std::string &s) noexcept;
83
84 /** Split given string `str` at `separator` into the resulting std::vector excluding the separator sequence . */
85 std::vector<std::string> split_string(const std::string &str, const std::string &separator) noexcept;
86
87 std::string &toLowerInPlace(std::string &s) noexcept;
88
89 std::string toLower(const std::string &s) noexcept;
90
91 /**
92 // *************************************************
93 // *************************************************
94 // *************************************************
95 */
96
97 enum class LoUpCase : bool {
98 lower = false,
99 upper = true
100 };
101
102 enum class PrefixOpt : bool {
103 none = false,
104 prefix = true
105 };
106
107 /**
108 * Converts a given hexadecimal string representation into a byte vector, lsb-first.
109 *
110 * In case a non valid hexadecimal digit appears in the given string,
111 * conversion ends and fills the byte vector up until the violation.
112 *
113 * In case hexstr contains an odd number of hex-nibbles, it will be interpreted as follows
114 * - 0xf[12] = 0x0f12 = { 0x12, 0x0f } - msb, 1st single low-nibble is most significant
115 * - [12]f = 0xf012 = { 0x12, 0xf0 } - lsb, last single high-nibble is most significant
116 *
117 * Even if complete==false, result holds the partial value if consumed_chars>0.
118 *
119 * You may use C++17 structured bindings to handle the pair.
120 *
121 * @param out the byte vector sink, lsb-first
122 * @param hexstr the hexadecimal string representation
123 * @param hexstr_len length of hextstr
124 * @param byteOrder lb_endian_t::big for big-endian bytes in `hexstr` (default)
125 * @param checkPrefix if True, checks for a leading `0x` and removes it, otherwise not.
126 * @return pair [size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
127 */
128 SizeBoolPair fromHexString(std::vector<uint8_t> &out, const uint8_t hexstr[], const size_t hexstr_len,
129 const lb_endian_t byteOrder = lb_endian_t::big, const Bool checkPrefix = Bool::True) noexcept;
130
131 /** See hexStringBytes() */
132 inline SizeBoolPair fromHexString(std::vector<uint8_t> &out, const std::string_view hexstr,
133 const lb_endian_t byteOrder = lb_endian_t::big, const Bool checkPrefix = Bool::True) noexcept {
134 return jau::fromHexString(out, cast_char_ptr_to_uint8(hexstr.data()), hexstr.length(), byteOrder, checkPrefix); // NOLINT(bugprone-suspicious-stringview-data-usage)
135 }
136
137 /**
138 * Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes().
139 *
140 * Even if complete==false, result holds the partial value if consumed_chars>0.
141 *
142 * You may use C++17 structured bindings to handle the tuple.
143 *
144 * @param hexstr the hexadecimal string representation
145 * @param byteOrder lb_endian_t::big for big-endian bytes in `hexstr` (default)
146 * @param checkPrefix if True, checks for a leading `0x` and removes it, otherwise not.
147 * @return tuple [uint64_t result, size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
148 * @see hexStringBytes()
149 * @see to_hexstring()
150 */
151 UInt64SizeBoolTuple fromHexString(std::string_view const hexstr, const lb_endian_t byteOrder = lb_endian_t::big,
152 const Bool checkPrefix = Bool::True) noexcept;
153
154 inline constexpr const char *HexadecimalArrayLow = "0123456789abcdef";
155 inline constexpr const char *HexadecimalArrayBig = "0123456789ABCDEF";
156
157 /**
158 * Produce a hexadecimal string representation of the given lsb-first byte values.
159 *
160 * If byteOrder is lb_endian_t::little, orders lsb-byte left, usual for byte streams. Result will not have a leading `0x`.
161 * Otherwise, lb_endian_t::big (default), orders msb-byte left for integer values. Result will have a leading `0x` if !skipPrefix.
162 *
163 * @param data pointer to the first byte to print, lsb-first
164 * @param length number of bytes to print
165 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
166 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
167 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
168 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
169 * @return the hex-string representation of the data
170 */
171 std::string toHexString(const void *data, const nsize_t length,
172 const lb_endian_t byteOrder = lb_endian_t::big, const LoUpCase capitalization = LoUpCase::lower,
173 const PrefixOpt prefix = PrefixOpt::prefix) noexcept;
174
175 /**
176 * Produce a hexadecimal string representation of the given byte value and appends it to the given string
177 * @param dest the std::string reference destination to append
178 * @param value the byte value to represent
179 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
180 * @return the given std::string reference for chaining
181 */
182 std::string &appendToHexString(std::string &dest, const uint8_t value, const LoUpCase capitalization = LoUpCase::lower) noexcept;
183
184 /**
185 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given pointer.
186 * @tparam value_type a pointer type
187 * @param v the pointer of given pointer type
188 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
189 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
190 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
191 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
192 * @return the hex-string representation of the value
193 * @see bytesHexString()
194 * @see from_hexstring()
195 */
196 template<class value_type>
199 inline std::string toHexString(value_type const &v, const lb_endian_t byteOrder = lb_endian_t::big,
200 const LoUpCase capitalization = LoUpCase::lower,
201 const PrefixOpt prefix = PrefixOpt::prefix) noexcept
202 {
203#if defined(__EMSCRIPTEN__) // jau::os::is_generic_wasm()
204 static_assert(is_little_endian()); // Bug in emscripten, unable to deduce uint16_t, uint32_t or uint64_t override of cpu_to_le() or bswap()
205 const uintptr_t v_le = reinterpret_cast<uintptr_t>(v);
206 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
207 byteOrder, capitalization, prefix);
208#else
209 const uintptr_t v_le = jau::cpu_to_le(reinterpret_cast<uintptr_t>(v));
210 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
211 byteOrder, capitalization, prefix);
212#endif
213 }
214
215 /**
216 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given uint8_t continuous container values.
217 * @tparam uint8_container_type a uint8_t continuous container type
218 * @param bytes the value of given uint8_t continuous container type
219 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
220 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
221 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
222 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
223 * @return the hex-string representation of the value
224 * @see bytesHexString()
225 * @see from_hexstring()
226 */
227 template<class uint8_container_type>
229 std::convertible_to<typename uint8_container_type::value_type, uint8_t>
230 inline std::string toHexString(const uint8_container_type &bytes,
231 const lb_endian_t byteOrder = lb_endian_t::big, const LoUpCase capitalization = LoUpCase::lower,
232 const PrefixOpt skipPrefix = PrefixOpt::prefix) noexcept
233 {
234 return toHexString((const uint8_t *)bytes.data(), bytes.size(), byteOrder, capitalization, skipPrefix);
235 }
236
237 /**
238 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given value with standard layout.
239 * @tparam value_type a standard layout value type
240 * @param v the value of given standard layout type
241 * @param byteOrder lb_endian_t::big for big-endian bytes in resulting hex-string (default).
242 * A leading `0x` will be prepended if `byteOrder == lb_endian_t::big` and `PrefixOpt::prefix` given.
243 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
244 * @param prefix pass PrefixOpt::prefix (default) to add leading `0x` if `byteOrder == lb_endian_t::big` (default)
245 * @return the hex-string representation of the value
246 * @see bytesHexString()
247 * @see from_hexstring()
248 */
249 template<class value_type>
254 inline std::string toHexString(value_type const &v, const lb_endian_t byteOrder = lb_endian_t::big,
255 const LoUpCase capitalization = LoUpCase::lower,
256 const PrefixOpt prefix = PrefixOpt::prefix) noexcept {
257 if constexpr ( is_little_endian() ) {
258 return toHexString(pointer_cast<const uint8_t *>(&v), sizeof(v),
259 byteOrder, capitalization, prefix);
260 } else {
261 const value_type v_le = jau::bswap(v);
262 return toHexString(pointer_cast<const uint8_t *>(&v_le), sizeof(v),
263 byteOrder, capitalization, prefix);
264 }
265 }
266
267 /**
268 // *************************************************
269 // *************************************************
270 // *************************************************
271 */
272
273 /**
274 * Converts a given binary string representation into a byte vector, lsb-first.
275 *
276 * In case a non valid binary digit appears in the given string,
277 * conversion ends and fills the byte vector up until the violation.
278 *
279 * In case bitstr contains an incomplete number of bit-nibbles, it will be interpreted as follows
280 * - 0b11[00000001] = 0x0301 = { 0x01, 0x03 } - msb, 1st single low-nibble is most significant
281 * - 0b[01000000]11 = 0xC040 = { 0x40, 0xC0 } - lsb, last single high-nibble is most significant
282 * - 11 -> 11000000 -> C0
283 *
284 * Even if complete==false, result holds the partial value if consumed_chars>0.
285 *
286 * You may use C++17 structured bindings to handle the pair.
287 *
288 * @param out the byte vector sink, lsb-first
289 * @param bitstr the binary string representation
290 * @param bitstr_len length of bitstr
291 * @param bitOrder bit_order_t::msb for most significant bits in `bitstr` first (default)
292 * @param checkPrefix if True, checks for a leading `0b` and removes it, otherwise not.
293 * @return pair [size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
294 */
295 SizeBoolPair fromBitString(std::vector<uint8_t> &out, const uint8_t bitstr[], const size_t bitstr_len,
296 const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept;
297
298 /** See fromBitString() */
299 inline SizeBoolPair fromBitString(std::vector<uint8_t> &out, const std::string_view bitstr,
300 const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept {
301 return jau::fromBitString(out, cast_char_ptr_to_uint8(bitstr.data()), bitstr.length(), bitOrder, checkPrefix); // NOLINT(bugprone-suspicious-stringview-data-usage)
302 }
303
304 /**
305 * Converts a given binary string representation into a uint64_t value according to bitStringBytes().
306 *
307 * Even if complete==false, result holds the partial value if consumed_chars>0.
308 *
309 * You may use C++17 structured bindings to handle the tuple.
310 *
311 * @param bitstr the binary string representation
312 * @param checkPrefix if true, checks for a leading `0b` and removes it, otherwise not.
313 * @param bitOrder bit_order_t::msb for most significant bits in `bitstr` first (default)
314 * @return tuple [uint64_t result, size_t consumed_chars, bool complete], i.e. consumed characters of string and completed=false if not fully consumed.
315 * @see bitStringBytes()
316 * @see to_bitstring()
317 */
318 UInt64SizeBoolTuple fromBitString(std::string_view const bitstr, const bit_order_t bitOrder = bit_order_t::msb, const Bool checkPrefix = Bool::True) noexcept;
319
320 /**
321 * Produce a binary string representation of the given lsb-first byte values.
322 *
323 * If byteOrder is lb_endian_t::little, orders lsb-byte left, usual for byte streams. Result will not have a leading `0b`.
324 * Otherwise, lb_endian_t::big (default), orders msb-byte left for integer values. Result will have a leading `0b` if !skipPrefix.
325 *
326 * @param data pointer to the first byte to print, lsb-first
327 * @param length number of bytes to print
328 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
329 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
330 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
331 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
332 * @return the bit-string representation of the data
333 */
334 std::string toBitString(const void *data, const nsize_t length,
336 size_t bit_len=0) noexcept;
337
338 /**
339 * Produce a binary string representation with leading `0b` in MSB of the given uint8_t continuous container values.
340 * @tparam uint8_container_type a uint8_t continuous container type
341 * @param bytes the value of given uint8_t continuous container type
342 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
343 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
344 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
345 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
346 * @return the bit-string representation of the value
347 * @see bytesBitString()
348 * @see from_bitstring()
349 */
350 template<class uint8_container_type>
351 requires jau::req::contiguous_container<uint8_container_type> &&
352 std::convertible_to<typename uint8_container_type::value_type, uint8_t>
353 inline std::string toBitString(const uint8_container_type &bytes,
354 const bit_order_t bitOrder = bit_order_t::msb, const PrefixOpt prefix = PrefixOpt::prefix, size_t bit_len=0) noexcept {
355 return toBitString((const uint8_t *)bytes.data(), bytes.size(), bitOrder, prefix, bit_len);
356 }
357
358 /**
359 * Produce a binary string representation with leading `0b` in MSB of the given value with standard layout.
360 * @tparam value_type a standard layout value type
361 * @param v the value of given standard layout type
362 * @param bitOrder bit_order_t::msb for most-significant-bit first in resulting bit-string, bit_order_t::msb is default
363 * A leading `0b` will be prepended if `bitOrder == bit_order_t::msb` and `PrefixOpt::prefix` given.
364 * @param prefix pass PrefixOpt::prefix (default) to add leading `0b` if `bitOrder == bit_order_t::msb` (default)
365 * @param bit_len optional fixed number of bits to be printed counting from lsb excluding prefix. Pass zero for dropping zero leading bytes (default).
366 * @return the bit-string representation of the value
367 * @see bytesBitString()
368 * @see from_bitstring()
369 */
370 template<class value_type>
375 inline std::string toBitString(value_type const &v, const bit_order_t bitOrder = bit_order_t::msb,
376 const PrefixOpt prefix = PrefixOpt::prefix, size_t bit_len=0) noexcept
377 {
378 if constexpr ( is_little_endian() ) {
379 return toBitString(pointer_cast<const uint8_t *>(&v), sizeof(v),
380 bitOrder, prefix, bit_len);
381 } else {
382 const value_type v_le = jau::bswap(v);
383 return toBitString(pointer_cast<const uint8_t *>(&v_le), sizeof(v),
384 bitOrder, prefix, bit_len);
385 }
386 }
387
388 /**
389 // *************************************************
390 // *************************************************
391 // *************************************************
392 */
393
394 /**
395 * Produce a decimal string representation of an integral integer value.
396 * @tparam value_type an integral integer type
397 * @param v the integral integer value
398 * @param separator if not 0, use as separation character, otherwise no separation characters are being used
399 * @param width the minimum number of characters to be printed. Add padding with blank space if result is shorter.
400 * @return the string representation of the integral integer value
401 */
402 template<class value_type,
403 std::enable_if_t<std::is_integral_v<value_type>,
404 bool> = true>
405 std::string to_decstring(const value_type &v, const char separator = ',', const nsize_t width = 0) noexcept {
406 const snsize_t v_sign = jau::sign<value_type>(v);
407 const size_t digit10_count1 = jau::digits10<value_type>(v, v_sign, true /* sign_is_digit */);
408 const size_t digit10_count2 = v_sign < 0 ? digit10_count1 - 1 : digit10_count1; // less sign
409
410 const size_t separator_count = separator ? (digit10_count1 - 1) / 3 : 0;
411 const size_t net_chars = digit10_count1 + separator_count;
412 const size_t total_chars = std::max<size_t>(width, net_chars);
413 std::string res(total_chars, ' ');
414
415 value_type n = v;
416 size_t char_iter = 0;
417
418 for ( size_t digit10_iter = 0; digit10_iter < digit10_count2 /* && char_iter < total_chars */; digit10_iter++ ) {
419 const int digit = v_sign < 0 ? invert_sign(n % 10) : n % 10;
420 n /= 10;
421 if ( separator && 0 < digit10_iter && 0 == digit10_iter % 3 ) {
422 res[total_chars - 1 - (char_iter++)] = separator;
423 }
424 res[total_chars - 1 - (char_iter++)] = '0' + digit;
425 }
426 if ( v_sign < 0 /* && char_iter < total_chars */ ) {
427 res[total_chars - 1 - (char_iter++)] = '-';
428 }
429 return res;
430 }
431
432 /**
433 // *************************************************
434 // *************************************************
435 // *************************************************
436 */
437
438 /**
439 * Appends a string representation of an integral integer value with given radix.
440 * @tparam value_type an integral integer type
441 * @param dest the std::string to append to
442 * @param val the unsigned integral integer value
443 * @param radix base of the number system, supported: 2 binary, 8 octal, 10 decimal, 16 hexadecimal
444 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
445 * @param prefix pass PrefixOpt::prefix (default) to add leading prefix for radix. Prefixes: `0x` hex, `0` octal and `0b` binary.
446 * @param min_width the minimum number of characters to be printed including prefix. Add padding with `padding` if result is shorter.
447 * @param separator separator character for each decimal 3 or other radix 4. Defaults to 0 for no separator.
448 * @param padding padding character, defaults to '0'. See 'min_width' above.
449 * @return the string representation of the unsigned integral integer value with given radix
450 */
451 template<std::integral value_type>
452 std::string& append_string(std::string &dest, value_type val, const nsize_t radix,
453 const LoUpCase capitalization = LoUpCase::lower,
455 const nsize_t min_width = 0, const char separator = 0, const char padding = '0') noexcept
456 {
457 const size_t dest_start_len = dest.size();
458 nsize_t shift;
459 switch ( radix ) {
460 case 16: shift = 4; break;
461 case 10: shift = 0; break;
462 case 8: shift = 3; break;
463 case 2: shift = 1; break;
464 default: return dest;
465 }
466 typedef std::make_unsigned_t<value_type> unsigned_value_type;
467 unsigned_value_type v = unsigned_value_type( jau::abs(val) );
468 const char *hex_array = LoUpCase::lower == capitalization ? HexadecimalArrayLow : HexadecimalArrayBig;
469 const nsize_t mask = radix - 1; // ignored for radix 10
470 const nsize_t val_digits = jau::digits<value_type>(v, radix);
471 nsize_t sign_len = 0;
472 char sign = 0;
473 if( !jau::is_positive(val) ) {
474 sign = '-';
475 ++sign_len;
476 }
477 const nsize_t prefix_len = (PrefixOpt::none == prefix || 10 == radix) ? 0 : (8 == radix ? 1 : 2);
478 const nsize_t sep_gap = 10 == radix ? 3 : 4;
479 nsize_t sep_count = 0;
480 if( val_digits > 0 && separator ) {
481 if ( '0' == padding ) {
482 // separator inside padding
483 if ( min_width > prefix_len ) {
484 const size_t len0 = std::max<size_t>(min_width - prefix_len, val_digits);
485 sep_count = (len0 - 1) / sep_gap;
486 if ( val_digits + sep_count + prefix_len > min_width ) {
487 --sep_count; // fix down
488 }
489 } else {
490 sep_count = (val_digits - 1) / sep_gap;
491 }
492 } else {
493 // separator w/o padding
494 sep_count = (val_digits - 1) / sep_gap;
495 }
496 }
497 {
498 const size_t added_len = std::max<size_t>(min_width, val_digits + sep_count + sign_len + prefix_len);
499 dest.resize(dest_start_len + added_len, ' ');
500 }
501 const char * const d_start = dest.data() + dest_start_len;
502 const char * const d_start_num = d_start + prefix_len;
503 char *d = dest.data()+dest.size();
504
505 nsize_t digit_cnt = 0, separator_idx = 0;
506 while ( d > d_start_num ) {
507 if ( separator_idx < sep_count && 0 < digit_cnt && 0 == digit_cnt % sep_gap ) {
508 *(--d) = separator;
509 ++separator_idx;
510 }
511 if ( d > d_start_num ) {
512 if (digit_cnt >= val_digits) {
513 if( !sign || ( padding == '0' && d > d_start_num+1 ) ) {
514 *(--d) = padding;
515 } else {
516 *(--d) = sign;
517 sign = 0;
518 }
519 } else if ( 10 == radix ) {
520 *(--d) = '0' + (v % 10);
521 v /= 10;
522 } else {
523 *(--d) = hex_array[v & mask];
524 v >>= shift;
525 }
526 ++digit_cnt;
527 }
528 }
529 if ( d > d_start ) {
530 switch ( radix ) { // NOLINT(bugprone-switch-missing-default-case)
531 case 16: *(--d) = 'x'; break;
532 case 8: *(--d) = '0'; break;
533 case 2: *(--d) = 'b'; break;
534 }
535 if ( d > d_start ) {
536 *(--d) = '0';
537 }
538 }
539 return dest;
540 }
541
542 /**
543 * Produce a string representation of an integral integer value with given radix.
544 * @tparam value_type an unsigned integral integer type
545 * @param v the integral integer value
546 * @param radix base of the number system, supported: 2 binary, 8 octal, 10 decimal, 16 hexadecimal
547 * @param capitalization LoUpCase capitalization, default is LoUpCase::lower
548 * @param prefix pass PrefixOpt::prefix (default) to add leading prefix for radix. Prefixes: `0x` hex, `0` octal and `0b` binary.
549 * @param min_width the minimum number of characters to be printed including prefix. Add padding with `padding` if result is shorter.
550 * @param separator separator character for each decimal 3 or other radix 4. Defaults to 0 for no separator.
551 * @param padding padding character, defaults to '0'. See 'min_width' above.
552 * @return the string representation of the unsigned integral integer value with given radix
553 */
554 template<std::integral value_type>
555 std::string to_string(value_type v, const nsize_t radix,
556 const LoUpCase capitalization = LoUpCase::lower,
558 const nsize_t min_width = 0, const char separator = 0, const char padding = '0') noexcept
559 {
560 std::string str;
561 append_string(str, v, radix, capitalization, prefix, min_width, separator, padding);
562 return str;
563 }
564
565 template<class value_type>
567 constexpr bool from_chars(value_type &result, std::string_view str) noexcept {
568 using namespace jau::int_literals;
569 result = 0;
570
571 std::string_view::const_iterator str_end = str.cend();
572 std::string_view::const_iterator begin = str.cbegin();
573 while( begin < str_end && !jau::is_digit(*begin)) { ++begin; }
574 if( begin == str_end ) {
575 return false; // no number
576 }
577 const value_type sign = begin > str.cbegin() && *(begin-1) == '-' ? -1 : 1;
578
579 std::string_view::const_iterator end = begin + 1;
580 while( end < str_end && jau::is_digit(*end)) { ++end; }
581
582 value_type multiplier = 1;
583 while( end > begin ) {
584 const value_type digit = *(--end) - '0';
585 const value_type sum = digit * multiplier * sign;
586 if( sign > 0 && result > std::numeric_limits<value_type>::max() - sum ) {
587 // overflow
588 return false;
589 } else if( sign < 0 && result < std::numeric_limits<value_type>::min() - sum ) {
590 // underflow
591 return false;
592 }
593 result += sum;
594 multiplier *= 10;
595 }
596 return true;
597 }
598
599 template<class value_type>
601 constexpr bool from_chars(value_type &result, std::string_view str) noexcept {
602 using namespace jau::int_literals;
603 result = 0;
604
605 std::string_view::const_iterator str_end = str.cend();
606 std::string_view::const_iterator begin = str.cbegin();
607 while( begin < str_end && !jau::is_digit(*begin)) { ++begin; }
608 if( begin == str_end ) {
609 return false; // no number
610 }
611 {
612 const value_type sign = begin > str.cbegin() && *(begin-1) == '-' ? -1 : 1;
613 if( sign < 0 ) {
614 return false; // only for unsigned
615 }
616 }
617
618 std::string_view::const_iterator end = begin + 1;
619 while( end < str_end && jau::is_digit(*end)) { ++end; }
620
621 value_type multiplier = 1;
622 while( end > begin ) {
623 const value_type digit = *(--end) - '0';
624 const value_type sum = digit * multiplier;
625 if( result > std::numeric_limits<value_type>::max() - sum ) {
626 // overflow
627 return false;
628 }
629 result += sum;
630 multiplier *= 10;
631 }
632 return true;
633 }
634
635
636 /**
637 // *************************************************
638 // *************************************************
639 // *************************************************
640 */
641
642 namespace unsafe {
643 /**
644 * Returns a (potentially truncated) string according to `snprintf()` formatting rules
645 * and variable number of arguments following the `format` argument
646 * while utilizing the unsafe `vsnprintf`.
647 *
648 * This variant doesn't validate `format` against given arguments, see jau::format_string_n.
649 *
650 * Resulting string is truncated to `min(maxStrLen, formatLen)`,
651 * with `formatLen` being the given formatted string length of output w/o limitation.
652 *
653 * @param maxStrLen maximum resulting string length
654 * @param format `printf()` compliant format string
655 * @param args optional arguments matching the format string
656 */
657 std::string format_string_n(const std::size_t maxStrLen, const char* format, ...) noexcept;
658 std::string vformat_string_n(const std::size_t maxStrLen, const char* format, va_list args) noexcept;
659
660 /**
661 * Returns a (non-truncated) string according to `snprintf()` formatting rules
662 * and variable number of arguments following the `format` argument
663 * while utilizing the unsafe `vsnprintf`.
664 *
665 * This variant doesn't validate `format` against given arguments, see jau::format_string_h.
666 *
667 * Resulting string size matches formated output w/o limitation.
668 *
669 * @param strLenHint initially used string length w/o EOS
670 * @param format `printf()` compliant format string
671 * @param args optional arguments matching the format string
672 */
673 std::string format_string_h(const std::size_t strLenHint, const char* format, ...) noexcept;
674 std::string vformat_string_h(const std::size_t strLenHint, const char* format, va_list args) noexcept;
675
676 /**
677 * Returns a (non-truncated) string according to `snprintf()` formatting rules
678 * and variable number of arguments following the `format` argument
679 * while utilizing the unsafe `vsnprintf`.
680 *
681 * This variant doesn't validate `format` against given arguments, see jau::format_string.
682 *
683 * Resulting string size matches formated output w/o limitation.
684 *
685 * @param format `printf()` compliant format string
686 * @param args optional arguments matching the format string
687 */
688 std::string format_string(const char* format, ...) noexcept;
689
690 void errPrint(FILE *out, const char *msg, bool addErrno, bool addBacktrace, const char *func, const char *file, const int line,
691 const char* format, ...) noexcept;
692
693 } // namespace impl
694
695
696 /**
697 // *************************************************
698 // *************************************************
699 // *************************************************
700 */
701 template<typename CharT, std::size_t N>
702 constexpr std::string to_string(const CharT (&ref)[N]) {
703 return std::string(ref);
704 }
705
706 template<class value_type>
707 requires std::is_same_v<jau::StringLiteral<value_type::size>, value_type> // jau::req::string_alike<value_type>
708 constexpr std::string to_string(const value_type &ref) {
709 return std::string(ref);
710 }
711
712 template<class value_type,
713 std::enable_if_t<(std::is_integral_v<value_type> && !std::is_same_v<bool, std::remove_cv_t<value_type>>) ||
714 std::is_floating_point_v<value_type>,
715 bool> = true>
716 inline std::string to_string(const value_type &ref) {
717 return std::to_string(ref);
718 }
719
720 template<class value_type,
721 std::enable_if_t<std::is_same_v<bool, std::remove_cv_t<value_type>>,
722 bool> = true>
723 inline std::string to_string(const value_type &ref) {
724 return ref ? "T" : "F";
725 }
726
727 template<class value_type,
728 std::enable_if_t<!std::is_integral_v<value_type> &&
729 !std::is_floating_point_v<value_type> &&
730 std::is_base_of_v<std::string, value_type>,
731 bool> = true>
732 inline std::string to_string(const value_type &ref) {
733 return ref;
734 }
735
736 template<class value_type,
737 std::enable_if_t<!std::is_integral_v<value_type> &&
738 !std::is_floating_point_v<value_type> &&
739 !std::is_base_of_v<std::string, value_type> &&
740 std::is_base_of_v<std::string_view, value_type>,
741 bool> = true>
742 inline std::string to_string(const value_type &ref) {
743 return std::string(ref);
744 }
745
746 template<class value_type,
747 std::enable_if_t<!std::is_integral_v<value_type> &&
748 !std::is_floating_point_v<value_type> &&
749 !std::is_base_of_v<std::string, value_type> &&
750 !std::is_base_of_v<std::string_view, value_type> &&
751 std::is_same_v<char*, jau::req::base_pointer<value_type>>,
752 bool> = true>
753 inline std::string to_string(const value_type &ref) {
754 return std::string(ref);
755 }
756
757 template<class value_type,
758 std::enable_if_t<!std::is_integral_v<value_type> &&
759 !std::is_floating_point_v<value_type> &&
760 !std::is_base_of_v<std::string, value_type> &&
761 !std::is_base_of_v<std::string_view, value_type> &&
762 !std::is_same_v<char*, jau::req::base_pointer<value_type>> &&
763 std::is_pointer_v<value_type>,
764 bool> = true>
765 inline std::string to_string(const value_type &ref) {
766 return toHexString((void *)ref); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
767 }
768
769 template<class value_type,
770 std::enable_if_t<!std::is_integral_v<value_type> &&
771 !std::is_floating_point_v<value_type> &&
772 !std::is_base_of_v<std::string, value_type> &&
773 !std::is_base_of_v<std::string_view, value_type> &&
774 !std::is_pointer_v<value_type> &&
776 bool> = true>
777 inline std::string to_string(const value_type &ref) {
778 return ref.toString();
779 }
780
781 template<class value_type,
782 std::enable_if_t<!std::is_integral_v<value_type> &&
783 !std::is_floating_point_v<value_type> &&
784 !std::is_base_of_v<std::string, value_type> &&
785 !std::is_base_of_v<std::string_view, value_type> &&
786 !std::is_pointer_v<value_type> &&
789 bool> = true>
790 inline std::string to_string(const value_type &ref) {
791 return ref.to_string();
792 }
793
794 template<class value_type,
795 std::enable_if_t<!std::is_integral_v<value_type> &&
796 !std::is_floating_point_v<value_type> &&
797 !std::is_base_of_v<std::string, value_type> &&
798 !std::is_base_of_v<std::string_view, value_type> &&
799 !std::is_pointer_v<value_type> &&
803 bool> = true>
804 inline std::string to_string(const value_type &ref) {
805 return toHexString((void *)ref.operator->());
806 }
807
808 template<class value_type,
809 std::enable_if_t<!std::is_integral_v<value_type> &&
810 !std::is_floating_point_v<value_type> &&
811 !std::is_base_of_v<std::string, value_type> &&
812 !std::is_base_of_v<std::string_view, value_type> &&
813 !std::is_pointer_v<value_type> &&
817 bool> = true>
818 inline std::string to_string(const value_type &ref) {
819 (void)ref;
820 return "jau::to_string<T> n/a for type " + jau::static_ctti<value_type>().toString();
821 }
822
823 template<typename T>
824 std::string to_string(std::vector<T> const &list, const std::string &delim) {
825 if ( list.empty() ) {
826 return std::string();
827 }
828 bool need_delim = false;
829 std::string res;
830 for ( const T &e : list ) {
831 if ( need_delim ) {
832 res.append(delim);
833 }
834 res.append(to_string(e));
835 need_delim = true;
836 }
837 return res;
838 }
839 template<typename T>
840 std::string to_string(std::vector<T> const &list) { return to_string<T>(list, ", "); }
841
842 template<typename T>
843 std::string to_string(std::vector<T> const &list, const std::string &delim, const nsize_t radix) {
844 if ( list.empty() ) {
845 return std::string();
846 }
847 bool need_delim = false;
848 std::string res;
849 for ( const T &e : list ) {
850 if ( need_delim ) {
851 res.append(delim);
852 }
853 res.append(to_string(e, radix));
854 need_delim = true;
855 }
856 return res;
857 }
858 template<typename T>
859 std::string to_string(std::vector<T> const &list, const nsize_t radix) { return to_string<T>(list, ", ", radix); }
860
861 /**
862 * Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion via `std::strtoll`.
863 *
864 * Even if complete==false, result holds the partial value if consumed_chars>0.
865 *
866 * You may use C++17 structured bindings to handle the tuple.
867 */
868 Int64SizeBoolTuple to_integer(const char *str, size_t str_len, const nsize_t radix = 10, const char limiter = '\0', const char *limiter_pos = nullptr) noexcept;
869
870
871 /**
872 * Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion via `std::strtoll`.
873 *
874 * Even if complete==false, result holds the partial value if consumed_chars>0.
875 *
876 * You may use C++17 structured bindings to handle the tuple.
877 */
878 inline Int64SizeBoolTuple to_integer(const std::string_view str, const nsize_t radix = 10, const char limiter = '\0', const char *limiter_pos = nullptr) noexcept {
879 return to_integer(str.data(), str.length(), radix, limiter, limiter_pos);
880 }
881
882 /**@}*/
883
884} // namespace jau
885
886/** \example test_intdecstring01.cpp
887 * This C++ unit test validates the jau::to_decstring implementation
888 */
889
890#endif /* JAU_STRING_UTIL_HPP_ */
std::string toString() const noexcept
C++ Named Requirement Container (partial)
C++ Named Requirement ContiguousContainer (partial)
Concept of type-trait std::is_pointer.
Concept of type-trait std::is_signed and std::is_integral.
Concept of type-trait std::is_standard_layout.
Concept of type-trait std::is_trivially_copyable.
Concept of type-trait std::is_unsigned and std::is_integral.
constexpr bool is_little_endian() noexcept
Evaluates true if platform is running in little endian mode, i.e.
bit_order_t
Bit order type, i.e.
constexpr uint16_t bswap(uint16_t const source) noexcept
Definition byte_util.hpp:88
lb_endian_t
Simplified reduced endian type only covering little- and big-endian.
std::string to_string(const endian_t v) noexcept
Return std::string representation of the given endian.
constexpr uint16_t cpu_to_le(uint16_t const h) noexcept
const uint8_t * cast_char_ptr_to_uint8(const char *s) noexcept
@ msb
Identifier for most-significant-bit (msb) first.
@ big
Identifier for big endian.
@ big
Identifier for big endian, equivalent to endian::big.
constexpr bool value(const Bool rhs) noexcept
constexpr bool has_toString_v
constexpr bool has_member_of_pointer_v
constexpr bool has_to_string_v
constexpr std::enable_if_t< sizeof(Dest)==sizeof(Source) &&std::is_pointer_v< Source > &&std::is_pointer_v< Dest >, Dest > pointer_cast(const Source &src) noexcept
A constexpr pointer cast implementation for C++17, inspired by C++20 bit_cast<>(arg).
const jau::type_info & static_ctti() noexcept
Returns a static global reference of make_ctti<T>(true) w/ identity instance.
Bool
Boolean type without implicit conversion, safe for function parameter.
sint_bytes_t< sizeof(long int)> snsize_t
Natural 'ssize_t' alternative using int<XX>_t with xx = sizeof(long int)*8 as its natural sized type,...
Definition int_types.hpp:97
constexpr nsize_t digits(const T x, const nsize_t radix) noexcept
Returns the number of digits of the given unsigned integral value number and the given radix.
Definition int_math.hpp:476
constexpr T invert_sign(const T x) noexcept
Safely inverts the sign of an arithmetic number w/ branching in O(1)
constexpr nsize_t digits10(const T x, const snsize_t x_sign, const bool sign_is_digit=true) noexcept
Returns the number of decimal digits of the given integral value number using std::log10<T>().
Definition int_math.hpp:436
constexpr int sign(const T x) noexcept
Returns the value of the sign function (w/o branching ?) in O(1).
uint_bytes_t< sizeof(unsigned long int)> nsize_t
Natural 'size_t' alternative using uint<XX>_t with xx = sizeof(unsigned long int)*8 as its natural si...
Definition int_types.hpp:85
constexpr bool is_positive(const T a) noexcept
Returns true of the given integral is positive, i.e.
Definition base_math.hpp:69
constexpr T abs(const T x) noexcept
Returns the absolute value of an arithmetic number (w/ branching) in O(1)
std::string & toLowerInPlace(std::string &s) noexcept
constexpr bool from_chars(value_type &result, std::string_view str) noexcept
std::string trim(const std::string &s) noexcept
trim copy
std::string & append_string(std::string &dest, value_type val, const nsize_t radix, const LoUpCase capitalization=LoUpCase::lower, const PrefixOpt prefix=PrefixOpt::prefix, const nsize_t min_width=0, const char separator=0, const char padding='0') noexcept
Appends a string representation of an integral integer value with given radix.
constexpr bool is_digit(char c) noexcept
void trimInPlace(std::string &s) noexcept
trim in place
std::string toBitString(const void *data, const nsize_t length, const bit_order_t bitOrder=bit_order_t::msb, const PrefixOpt prefix=PrefixOpt::prefix, size_t bit_len=0) noexcept
Produce a binary string representation of the given lsb-first byte values.
Int64SizeBoolTuple to_integer(const char *str, size_t str_len, const nsize_t radix=10, const char limiter='\0', const char *limiter_pos=nullptr) noexcept
Returns tuple [int64_t result, size_t consumed_chars, bool complete] of string to integer conversion ...
std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept
Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
constexpr const char * HexadecimalArrayBig
SizeBoolPair fromBitString(std::vector< uint8_t > &out, const uint8_t bitstr[], const size_t bitstr_len, const bit_order_t bitOrder=bit_order_t::msb, const Bool checkPrefix=Bool::True) noexcept
Converts a given binary string representation into a byte vector, lsb-first.
std::vector< std::string > split_string(const std::string &str, const std::string &separator) noexcept
Split given string str at separator into the resulting std::vector excluding the separator sequence .
std::string & appendToHexString(std::string &dest, const uint8_t value, const LoUpCase capitalization=LoUpCase::lower) noexcept
Produce a hexadecimal string representation of the given byte value and appends it to the given strin...
bool is_ascii_code(int c) noexcept
std::string toLower(const std::string &s) noexcept
SizeBoolPair fromHexString(std::vector< uint8_t > &out, const uint8_t hexstr[], const size_t hexstr_len, const lb_endian_t byteOrder=lb_endian_t::big, const Bool checkPrefix=Bool::True) noexcept
Converts a given hexadecimal string representation into a byte vector, lsb-first.
constexpr const char * HexadecimalArrayLow
std::string toHexString(const void *data, const nsize_t length, const lb_endian_t byteOrder=lb_endian_t::big, const LoUpCase capitalization=LoUpCase::lower, const PrefixOpt prefix=PrefixOpt::prefix) noexcept
Produce a hexadecimal string representation of the given lsb-first byte values.
std::string to_decstring(const value_type &v, const char separator=',', const nsize_t width=0) noexcept
Produce a decimal string representation of an integral integer value.
std::string format_string_h(const std::size_t strLenHint, const char *format,...) noexcept
Returns a (non-truncated) string according to snprintf() formatting rules and variable number of argu...
std::string vformat_string_h(const std::size_t strLenHint, const char *format, va_list args) noexcept
void errPrint(FILE *out, const char *msg, bool addErrno, bool addBacktrace, const char *func, const char *file, const int line, const char *format,...) noexcept
std::string format_string_n(const std::size_t maxStrLen, const char *format,...) noexcept
Returns a (potentially truncated) string according to snprintf() formatting rules and variable number...
std::string vformat_string_n(const std::size_t maxStrLen, const char *format, va_list args) noexcept
std::string format_string(const char *format,...) noexcept
Returns a (non-truncated) string according to snprintf() formatting rules and variable number of argu...
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition backtrace.hpp:32
STL namespace.
Simple pre-defined value tuple [int64_t, size_t, bool] for structured bindings to multi-values.
Simple pre-defined value pair [size_t, bool] for structured bindings to multi-values.
uint_bytes_t< sizeof(unsigned long int)> nsize_t
Natural 'size_t' alternative using uint<XX>_t with xx = sizeof(unsigned long int)*8 as its natural si...
Definition int_types.hpp:85