jaulib v1.3.6
Jau Support Library (C++, Java, ..)
Loading...
Searching...
No Matches
string_util.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef JAU_STRING_UTIL_HPP_
26#define JAU_STRING_UTIL_HPP_
27
28#include <cstdint>
29#include <cstring>
30#include <cstdarg>
31#include <string>
32#include <string_view>
33#include <type_traits>
34#include <unordered_map>
35#include <unordered_set>
36#include <vector>
37
38#include <jau/byte_util.hpp>
39#include <jau/cpp_lang_util.hpp>
42
43#include <jau/int_types.hpp>
44#include <jau/int_math.hpp>
45#include <jau/string_cfmt.hpp>
46
47namespace jau {
48
49 /** @defgroup StringUtils String Utilities
50 * String utilities for type conversion and manipulation.
51 *
52 * @{
53 */
54
55 inline bool is_ascii_code(int c) noexcept {
56 return 0 != std::iscntrl(c) || 0 != std::isprint(c);
57 }
58
59 /**
60 * Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
61 * <p>
62 * The maximum length only delimits the string length and does not contain the EOS null byte.
63 * An EOS null byte will will be added.
64 * </p>
65 * <p>
66 * The source string within buffer is not required to contain an EOS null byte;
67 * </p>
68 */
69 std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept;
70
71 /** trim in place */
72 void trimInPlace(std::string &s) noexcept;
73
74 /** trim copy */
75 std::string trim(const std::string &s) noexcept;
76
77 /** Split given string `str` at `separator` into the resulting std::vector excluding the separator sequence . */
78 std::vector<std::string> split_string(const std::string& str, const std::string& separator) noexcept;
79
80 std::string& toLowerInPlace(std::string& s) noexcept;
81
82 std::string toLower(const std::string& s) noexcept;
83
84 /**
85 // *************************************************
86 // *************************************************
87 // *************************************************
88 */
89
90 /**
91 * Converts a given hexadecimal string representation into a byte vector.
92 *
93 * In case a non valid hexadecimal digit appears in the given string,
94 * conversion ends and fills the byte vector up until the violation.
95 *
96 * If string is in MSB first (default w/ leading 0x) and platform jau::is_little_endian(),
97 * lsbFirst = false shall be passed.
98 *
99 * In case hexstr contains an odd number of hex-nibbles, it will be interpreted as follows
100 * - 0xf12 = 0x0f12 = { 0x12, 0x0f } - msb, 1st single low-nibble is most significant
101 * - 12f = 0xf012 = { 0x12, 0xf0 } - lsb, last single high-nibble is most significant
102 *
103 * @param out the byte vector sink
104 * @param hexstr the hexadecimal string representation
105 * @param lsbFirst low significant byte first
106 * @param checkLeading0x if true, checks for a leading `0x` and removes it, otherwise not.
107 * @return the length of the matching byte vector
108 */
109 size_t hexStringBytes(std::vector<uint8_t>& out, const std::string& hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept;
110
111 /** See hexStringBytes() */
112 size_t hexStringBytes(std::vector<uint8_t>& out, const uint8_t hexstr[], const size_t hexstr_len, const bool lsbFirst, const bool checkLeading0x) noexcept;
113
114 /**
115 * Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes().
116 *
117 * If string is in MSB first (default w/ leading 0x) and platform jau::is_little_endian(),
118 * lsbFirst = false shall be passed (default).
119 *
120 * @param s the hexadecimal string representation
121 * @param lsbFirst low significant byte first
122 * @param checkLeading0x if true, checks for a leading `0x` and removes it, otherwise not.
123 * @return the uint64_t value
124 * @see hexStringBytes()
125 * @see to_hexstring()
126 */
127 uint64_t from_hexstring(std::string const & s, const bool lsbFirst=!jau::is_little_endian(), const bool checkLeading0x=true) noexcept;
128
129 /**
130 * Produce a hexadecimal string representation of the given byte values.
131 * <p>
132 * If lsbFirst is true, orders LSB left -> MSB right, usual for byte streams. Result will not have a leading `0x`.<br>
133 * Otherwise orders MSB left -> LSB right, usual for readable integer values. Result will have a leading `0x` if !skipLeading0x (default).
134 * </p>
135 * @param data pointer to the first byte to print
136 * @param length number of bytes to print
137 * @param lsbFirst true having the least significant byte printed first (lowest addressed byte to highest),
138 * otherwise have the most significant byte printed first (highest addressed byte to lowest).
139 * A leading `0x` will be prepended if `lsbFirst == false`.
140 * @param lowerCase true to use lower case hex-chars (default), otherwise capital letters are being used.
141 * @param skipLeading0x false to add leading `0x` if !lsbFirst (default), true to not add (skip)..
142 * @return the hex-string representation of the data
143 */
144 std::string bytesHexString(const void* data, const nsize_t length,
145 const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept;
146
147 template< class uint8_container_type,
148 std::enable_if_t<std::is_integral_v<typename uint8_container_type::value_type> &&
149 std::is_convertible_v<typename uint8_container_type::value_type, uint8_t>,
150 bool> = true>
151 std::string bytesHexString(const uint8_container_type& bytes,
152 const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept {
153 return bytesHexString((const uint8_t *)bytes.data(), bytes.size(), lsbFirst, lowerCase, skipLeading0x);
154 }
155
156 /**
157 * Produce a hexadecimal string representation of the given byte value.
158 * @param dest the std::string reference destination to append
159 * @param value the byte value to represent
160 * @param lowerCase true to use lower case hex-chars, otherwise capital letters are being used.
161 * @return the given std::string reference for chaining
162 */
163 std::string& byteHexString(std::string& dest, const uint8_t value, const bool lowerCase) noexcept;
164
165 /**
166 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given pointer.
167 * @tparam value_type a pointer type
168 * @param v the pointer of given pointer type
169 * @param skipLeading0x false to add leading `0x` (default), true to not add (skip)..
170 * @return the hex-string representation of the value
171 * @see bytesHexString()
172 * @see from_hexstring()
173 */
174 template< class value_type,
175 std::enable_if_t<std::is_pointer_v<value_type>,
176 bool> = true>
177 inline std::string to_hexstring(value_type const & v, const bool skipLeading0x=false) noexcept
178 {
179 #if defined(__EMSCRIPTEN__) // jau::os::is_generic_wasm()
180 static_assert( is_little_endian() ); // Bug in emscripten, unable to deduce uint16_t, uint32_t or uint64_t override of cpu_to_le() or bswap()
181 const uintptr_t v_le = reinterpret_cast<uintptr_t>(v);
182 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
183 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
184 #else
185 const uintptr_t v_le = jau::cpu_to_le( reinterpret_cast<uintptr_t>(v) );
186 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
187 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
188 #endif
189 }
190
191 /**
192 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given value with standard layout.
193 * @tparam value_type a standard layout value type
194 * @param v the value of given standard layout type
195 * @param skipLeading0x false to add leading `0x` (default), true to not add (skip)..
196 * @return the hex-string representation of the value
197 * @see bytesHexString()
198 * @see from_hexstring()
199 */
200 template< class value_type,
201 std::enable_if_t<!std::is_pointer_v<value_type> &&
202 std::is_standard_layout_v<value_type>,
203 bool> = true>
204 inline std::string to_hexstring(value_type const & v, const bool skipLeading0x=false) noexcept
205 {
206 if constexpr( is_little_endian() ) {
207 return bytesHexString(pointer_cast<const uint8_t*>(&v), sizeof(v),
208 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
209 } else {
210 const value_type v_le = jau::bswap(v);
211 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v),
212 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
213 }
214 }
215
216 /**
217 // *************************************************
218 // *************************************************
219 // *************************************************
220 */
221
222 /**
223 * Produce a decimal string representation of an integral integer value.
224 * @tparam T an integral integer type
225 * @param v the integral integer value
226 * @param separator if not 0, use as separation character, otherwise no separation characters are being used
227 * @param width the minimum number of characters to be printed. Add padding with blank space if result is shorter.
228 * @return the string representation of the integral integer value
229 */
230 template< class value_type,
231 std::enable_if_t< std::is_integral_v<value_type>,
232 bool> = true>
233 std::string to_decstring(const value_type& v, const char separator=',', const nsize_t width=0) noexcept {
234 const snsize_t v_sign = jau::sign<value_type>(v);
235 const nsize_t digit10_count1 = jau::digits10<value_type>(v, v_sign, true /* sign_is_digit */);
236 const nsize_t digit10_count2 = v_sign < 0 ? digit10_count1 - 1 : digit10_count1; // less sign
237
238 const nsize_t comma_count = 0 == separator ? 0 : ( digit10_count1 - 1 ) / 3;
239 const nsize_t net_chars = digit10_count1 + comma_count;
240 const nsize_t total_chars = std::max<nsize_t>(width, net_chars);
241 std::string res(total_chars, ' ');
242
243 value_type n = v;
244 nsize_t char_iter = 0;
245
246 for(nsize_t digit10_iter = 0; digit10_iter < digit10_count2 /* && char_iter < total_chars */; digit10_iter++ ) {
247 const int digit = v_sign < 0 ? invert_sign( n % 10 ) : n % 10;
248 n /= 10;
249 if( 0 < digit10_iter && 0 == digit10_iter % 3 ) {
250 res[total_chars-1-(char_iter++)] = separator;
251 }
252 res[total_chars-1-(char_iter++)] = '0' + digit;
253 }
254 if( v_sign < 0 /* && char_iter < total_chars */ ) {
255 res[total_chars-1-(char_iter++)] = '-';
256 }
257 return res;
258 }
259
260 /**
261 // *************************************************
262 // *************************************************
263 // *************************************************
264 */
265
266 /**
267 * Returns a string according to `vprintf()` formatting rules
268 * using `va_list` instead of a variable number of arguments.
269 * @param format `printf()` compliant format string
270 * @param ap `va_list` arguments
271 */
272 std::string vformat_string(const char* format, va_list ap);
273
274 /**
275 * Returns a string according to `printf()` formatting rules
276 * and variable number of arguments following the `format` argument.
277 * @param format `printf()` compliant format string
278 */
279 std::string format_string(const char* format, ...);
280
281 /**
282 * Safely returns a string according to `printf()` formatting rules
283 * and variable number of arguments following the `format` argument.
284 *
285 * jau::cfmt2::check() is utilize to pre-validate the given arguments
286 * against the format string. If invalid, method returns an empty string.
287 * Otherwise std::snprintf() is being utilized.
288 *
289 * @param maxStrLen maximum resulting string length
290 * @param format `printf()` compliant format string
291 * @param args optional arguments matching the format string
292 */
293 template <typename... Args>
294 constexpr std::string format_string_v(const std::size_t maxStrLen, const std::string_view format, const Args &...args) {
295 if ( jau::cfmt::check2<Args...>(format) ) {
296 std::string str;
297 str.reserve(maxStrLen + 1); // incl. EOS
298 str.resize(maxStrLen); // excl. EOS
299
300 // -Wformat=2 -> -Wformat -Wformat-nonliteral -Wformat-security -Wformat-y2k
301 // -Wformat=2 -Wformat-overflow=2 -Wformat-signedness
304 const size_t nchars = std::snprintf(&str[0], maxStrLen + 1, format.data(), args...);
306 if( nchars < maxStrLen + 1 ) {
307 str.resize(nchars);
308 str.shrink_to_fit();
309 } // else truncated w/ nchars > MaxStrLen
310 return str;
311 } else {
312 return "";
313 }
314 }
315
316 /**
317 // *************************************************
318 // *************************************************
319 // *************************************************
320 */
321
322 template< class value_type,
323 std::enable_if_t< std::is_integral_v<value_type> ||
324 std::is_floating_point_v<value_type>,
325 bool> = true>
326 inline std::string to_string(const value_type & ref)
327 {
328 return std::to_string(ref);
329 }
330
331 template< class value_type,
332 std::enable_if_t<!std::is_integral_v<value_type> &&
333 !std::is_floating_point_v<value_type> &&
334 std::is_base_of_v<std::string, value_type>,
335 bool> = true>
336 inline std::string to_string(const value_type & ref) {
337 return ref;
338 }
339
340 template< class value_type,
341 std::enable_if_t<!std::is_integral_v<value_type> &&
342 !std::is_floating_point_v<value_type> &&
343 !std::is_base_of_v<std::string, value_type> &&
344 std::is_base_of_v<std::string_view, value_type>,
345 bool> = true>
346 inline std::string to_string(const value_type & ref) {
347 return std::string(ref);
348 }
349
350 template< class value_type,
351 std::enable_if_t<!std::is_integral_v<value_type> &&
352 !std::is_floating_point_v<value_type> &&
353 !std::is_base_of_v<std::string, value_type> &&
354 !std::is_base_of_v<std::string_view, value_type> &&
355 std::is_pointer_v<value_type>,
356 bool> = true>
357 inline std::string to_string(const value_type & ref)
358 {
359 return to_hexstring((void*)ref); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
360 }
361
362 template< class value_type,
363 std::enable_if_t<!std::is_integral_v<value_type> &&
364 !std::is_floating_point_v<value_type> &&
365 !std::is_base_of_v<std::string, value_type> &&
366 !std::is_base_of_v<std::string_view, value_type> &&
367 !std::is_pointer_v<value_type> &&
369 bool> = true>
370 inline std::string to_string(const value_type & ref) {
371 return ref.toString();
372 }
373
374 template< class value_type,
375 std::enable_if_t<!std::is_integral_v<value_type> &&
376 !std::is_floating_point_v<value_type> &&
377 !std::is_base_of_v<std::string, value_type> &&
378 !std::is_base_of_v<std::string_view, value_type> &&
379 !std::is_pointer_v<value_type> &&
382 bool> = true>
383 inline std::string to_string(const value_type & ref) {
384 return ref.to_string();
385 }
386
387 template< class value_type,
388 std::enable_if_t<!std::is_integral_v<value_type> &&
389 !std::is_floating_point_v<value_type> &&
390 !std::is_base_of_v<std::string, value_type> &&
391 !std::is_base_of_v<std::string_view, value_type> &&
392 !std::is_pointer_v<value_type> &&
396 bool> = true>
397 inline std::string to_string(const value_type & ref) {
398 return to_hexstring((void*)ref.operator->());
399 }
400
401 template< class value_type,
402 std::enable_if_t<!std::is_integral_v<value_type> &&
403 !std::is_floating_point_v<value_type> &&
404 !std::is_base_of_v<std::string, value_type> &&
405 !std::is_base_of_v<std::string_view, value_type> &&
406 !std::is_pointer_v<value_type> &&
410 bool> = true>
411 inline std::string to_string(const value_type & ref) {
412 (void)ref;
413 return "jau::to_string<T> n/a for type "+type_cue<value_type>::to_string();
414 }
415
416 template<typename T>
417 std::string to_string(std::vector<T> const &list, const std::string& delim)
418 {
419 if ( list.empty() ) {
420 return std::string();
421 }
422 bool need_delim = false;
423 std::string res;
424 for(const T& e : list) {
425 if( need_delim ) {
426 res.append( delim );
427 }
428 res.append( to_string( e ) );
429 need_delim = true;
430 }
431 return res;
432 }
433 template<typename T>
434 std::string to_string(std::vector<T> const &list) { return to_string<T>(list, ", "); }
435
436 bool to_integer(long long & result, const std::string& str, const char limiter='\0', const char *limiter_pos=nullptr);
437 bool to_integer(long long & result, const char * str, size_t str_len, const char limiter='\0', const char *limiter_pos=nullptr);
438
439 /**
440 * C++20: Heterogeneous Lookup in (Un)ordered Containers
441 *
442 * @see https://www.cppstories.com/2021/heterogeneous-access-cpp20/
443 */
444 struct string_hash {
445 using is_transparent = void;
446 [[nodiscard]] size_t operator()(const char* txt) const {
447 return std::hash<std::string_view>{}(txt);
448 }
449 [[nodiscard]] size_t operator()(std::string_view txt) const {
450 return std::hash<std::string_view>{}(txt);
451 }
452 [[nodiscard]] size_t operator()(const std::string& txt) const {
453 return std::hash<std::string>{}(txt);
454 }
455 };
456
457 template<typename T>
458 using StringHashMap = std::unordered_map<std::string, T, string_hash, std::equal_to<>>;
459
460 using StringHashSet = std::unordered_set<std::string, string_hash, std::equal_to<>>;
461
462 /**@}*/
463
464} // namespace jau
465
466#define jau_format_string_static(...) \
467 jau::format_string(__VA_ARGS__); \
468 static_assert( 0 <= jau::cfmt::checkR(__VA_ARGS__).argCount() ); // compile time validation!
469
470/** \example test_intdecstring01.cpp
471 * This C++ unit test validates the jau::to_decstring implementation
472 */
473
474#endif /* JAU_STRING_UTIL_HPP_ */
constexpr bool is_little_endian() noexcept
Evaluates true if platform is running in little endian mode, i.e.
constexpr uint16_t bswap(uint16_t const source) noexcept
Definition byte_util.hpp:86
std::string to_string(const endian_t v) noexcept
Return std::string representation of the given endian.
constexpr bool is_little_endian(const endian_t byte_order) noexcept
Returns true if given byte_order equals endian::little, otherwise false.
constexpr uint16_t cpu_to_le(uint16_t const h) noexcept
constexpr bool value(const Bool rhs) noexcept
constexpr bool has_toString_v
constexpr bool has_member_of_pointer_v
constexpr bool has_to_string_v
#define PRAGMA_DISABLE_WARNING_PUSH
constexpr std::enable_if_t< sizeof(Dest)==sizeof(Source) &&std::is_pointer_v< Source > &&std::is_pointer_v< Dest >, Dest > pointer_cast(const Source &src) noexcept
A constexpr pointer cast implementation for C++17, inspired by C++20 bit_cast<>(arg).
#define PRAGMA_DISABLE_WARNING_POP
#define PRAGMA_DISABLE_WARNING_FORMAT_NONLITERAL
constexpr T invert_sign(const T x) noexcept
Safely inverts the sign of an arithmetic number w/ branching in O(1)
constexpr nsize_t digits10(const T x, const snsize_t x_sign, const bool sign_is_digit=true) noexcept
Returns the number of decimal digits of the given integral value number using std::log10<T>().
Definition int_math.hpp:403
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition int_types.hpp:55
int_fast32_t snsize_t
Natural 'ssize_t' alternative using int_fast32_t as its natural sized type.
Definition int_types.hpp:67
constexpr int sign(const T x) noexcept
Returns the value of the sign function (w/o branching ?) in O(1).
Definition base_math.hpp:84
constexpr std::string format_string_v(const std::size_t maxStrLen, const std::string_view format, const Args &...args)
Safely returns a string according to printf() formatting rules and variable number of arguments follo...
std::string & toLowerInPlace(std::string &s) noexcept
std::string trim(const std::string &s) noexcept
trim copy
std::string vformat_string(const char *format, va_list ap)
Returns a string according to vprintf() formatting rules using va_list instead of a variable number o...
void trimInPlace(std::string &s) noexcept
trim in place
std::string to_hexstring(value_type const &v, const bool skipLeading0x=false) noexcept
Produce a lower-case hexadecimal string representation with leading 0x in MSB of the given pointer.
std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept
Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
uint64_t from_hexstring(std::string const &s, const bool lsbFirst=!jau::is_little_endian(), const bool checkLeading0x=true) noexcept
Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes(...
std::string & byteHexString(std::string &dest, const uint8_t value, const bool lowerCase) noexcept
Produce a hexadecimal string representation of the given byte value.
std::unordered_set< std::string, string_hash, std::equal_to<> > StringHashSet
bool to_integer(long long &result, const std::string &str, const char limiter='\0', const char *limiter_pos=nullptr)
std::string bytesHexString(const void *data, const nsize_t length, const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept
Produce a hexadecimal string representation of the given byte values.
constexpr bool check2(const std::string_view fmt) noexcept
Strict type validation of arguments against the format string.
std::unordered_map< std::string, T, string_hash, std::equal_to<> > StringHashMap
std::vector< std::string > split_string(const std::string &str, const std::string &separator) noexcept
Split given string str at separator into the resulting std::vector excluding the separator sequence .
std::string format_string(const char *format,...)
Returns a string according to printf() formatting rules and variable number of arguments following th...
bool is_ascii_code(int c) noexcept
std::string toLower(const std::string &s) noexcept
size_t hexStringBytes(std::vector< uint8_t > &out, const std::string &hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept
Converts a given hexadecimal string representation into a byte vector.
std::string to_decstring(const value_type &v, const char separator=',', const nsize_t width=0) noexcept
Produce a decimal string representation of an integral integer value.
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition backtrace.hpp:32
STL namespace.
C++20: Heterogeneous Lookup in (Un)ordered Containers.
size_t operator()(const std::string &txt) const
size_t operator()(const char *txt) const
size_t operator()(std::string_view txt) const
static std::string to_string(const bool withSize=true) noexcept