Gamp v0.0.7-36-g24b1eb6
Gamp: Graphics, Audio, Multimedia and Processing
Loading...
Searching...
No Matches
string_util.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef JAU_STRING_UTIL_HPP_
26#define JAU_STRING_UTIL_HPP_
27
28#include <cstdint>
29#include <cstring>
30#include <cstdarg>
31#include <string>
32#include <string_view>
33#include <type_traits>
34#include <unordered_map>
35#include <unordered_set>
36#include <vector>
37
38#include <jau/byte_util.hpp>
39#include <jau/cpp_lang_util.hpp>
42
43#include <jau/int_types.hpp>
44#include <jau/int_math.hpp>
45#include <jau/string_cfmt.hpp>
46
47namespace jau {
48
49 /** @defgroup StringUtils String Utilities
50 * String utilities for type conversion and manipulation.
51 *
52 * @{
53 */
54
55 inline bool is_ascii_code(int c) noexcept {
56 return 0 != std::iscntrl(c) || 0 != std::isprint(c);
57 }
58
59 /**
60 * Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
61 * <p>
62 * The maximum length only delimits the string length and does not contain the EOS null byte.
63 * An EOS null byte will will be added.
64 * </p>
65 * <p>
66 * The source string within buffer is not required to contain an EOS null byte;
67 * </p>
68 */
69 std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept;
70
71 /** trim in place */
72 void trimInPlace(std::string &s) noexcept;
73
74 /** trim copy */
75 std::string trim(const std::string &s) noexcept;
76
77 /** Split given string `str` at `separator` into the resulting std::vector excluding the separator sequence . */
78 std::vector<std::string> split_string(const std::string& str, const std::string& separator) noexcept;
79
80 std::string& toLowerInPlace(std::string& s) noexcept;
81
82 std::string toLower(const std::string& s) noexcept;
83
84 /**
85 // *************************************************
86 // *************************************************
87 // *************************************************
88 */
89
90 /**
91 * Converts a given hexadecimal string representation into a byte vector.
92 *
93 * In case a non valid hexadecimal digit appears in the given string,
94 * conversion ends and fills the byte vector up until the violation.
95 *
96 * If string is in MSB first (default w/ leading 0x) and platform jau::is_little_endian(),
97 * lsbFirst = false shall be passed.
98 *
99 * In case hexstr contains an odd number of hex-nibbles, it will be interpreted as follows
100 * - 0xf12 = 0x0f12 = { 0x12, 0x0f } - msb, 1st single low-nibble is most significant
101 * - 12f = 0xf012 = { 0x12, 0xf0 } - lsb, last single high-nibble is most significant
102 *
103 * @param out the byte vector sink
104 * @param hexstr the hexadecimal string representation
105 * @param lsbFirst low significant byte first
106 * @param checkLeading0x if true, checks for a leading `0x` and removes it, otherwise not.
107 * @return the length of the matching byte vector
108 */
109 size_t hexStringBytes(std::vector<uint8_t>& out, const std::string& hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept;
110
111 /** See hexStringBytes() */
112 size_t hexStringBytes(std::vector<uint8_t>& out, const uint8_t hexstr[], const size_t hexstr_len, const bool lsbFirst, const bool checkLeading0x) noexcept;
113
114 /**
115 * Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes().
116 *
117 * If string is in MSB first (default w/ leading 0x) and platform jau::is_little_endian(),
118 * lsbFirst = false shall be passed (default).
119 *
120 * @param s the hexadecimal string representation
121 * @param lsbFirst low significant byte first
122 * @param checkLeading0x if true, checks for a leading `0x` and removes it, otherwise not.
123 * @return the uint64_t value
124 * @see hexStringBytes()
125 * @see to_hexstring()
126 */
127 uint64_t from_hexstring(std::string const & s, const bool lsbFirst=!jau::is_little_endian(), const bool checkLeading0x=true) noexcept;
128
129 /**
130 * Produce a hexadecimal string representation of the given byte values.
131 * <p>
132 * If lsbFirst is true, orders LSB left -> MSB right, usual for byte streams. Result will not have a leading `0x`.<br>
133 * Otherwise orders MSB left -> LSB right, usual for readable integer values. Result will have a leading `0x` if !skipLeading0x (default).
134 * </p>
135 * @param data pointer to the first byte to print
136 * @param length number of bytes to print
137 * @param lsbFirst true having the least significant byte printed first (lowest addressed byte to highest),
138 * otherwise have the most significant byte printed first (highest addressed byte to lowest).
139 * A leading `0x` will be prepended if `lsbFirst == false`.
140 * @param lowerCase true to use lower case hex-chars (default), otherwise capital letters are being used.
141 * @param skipLeading0x false to add leading `0x` if !lsbFirst (default), true to not add (skip)..
142 * @return the hex-string representation of the data
143 */
144 std::string bytesHexString(const void* data, const nsize_t length,
145 const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept;
146
147 template< class uint8_container_type,
148 std::enable_if_t<std::is_integral_v<typename uint8_container_type::value_type> &&
149 std::is_convertible_v<typename uint8_container_type::value_type, uint8_t>,
150 bool> = true>
151 std::string bytesHexString(const uint8_container_type& bytes,
152 const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept {
153 return bytesHexString((const uint8_t *)bytes.data(), bytes.size(), lsbFirst, lowerCase, skipLeading0x);
154 }
155
156 /**
157 * Produce a hexadecimal string representation of the given byte value.
158 * @param dest the std::string reference destination to append
159 * @param value the byte value to represent
160 * @param lowerCase true to use lower case hex-chars, otherwise capital letters are being used.
161 * @return the given std::string reference for chaining
162 */
163 std::string& byteHexString(std::string& dest, const uint8_t value, const bool lowerCase) noexcept;
164
165 /**
166 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given pointer.
167 * @tparam value_type a pointer type
168 * @param v the pointer of given pointer type
169 * @param skipLeading0x false to add leading `0x` (default), true to not add (skip)..
170 * @return the hex-string representation of the value
171 * @see bytesHexString()
172 * @see from_hexstring()
173 */
174 template< class value_type,
175 std::enable_if_t<std::is_pointer_v<value_type>,
176 bool> = true>
177 inline std::string to_hexstring(value_type const & v, const bool skipLeading0x=false) noexcept
178 {
179 #if defined(__EMSCRIPTEN__) // jau::os::is_generic_wasm()
180 static_assert( is_little_endian() ); // Bug in emscripten, unable to deduce uint16_t, uint32_t or uint64_t override of cpu_to_le() or bswap()
181 const uintptr_t v_le = reinterpret_cast<uintptr_t>(v);
182 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
183 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
184 #else
185 const uintptr_t v_le = jau::cpu_to_le( reinterpret_cast<uintptr_t>(v) );
186 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v), // NOLINT(bugprone-sizeof-expression): Intended
187 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
188 #endif
189 }
190
191 /**
192 * Produce a lower-case hexadecimal string representation with leading `0x` in MSB of the given value with standard layout.
193 * @tparam value_type a standard layout value type
194 * @param v the value of given standard layout type
195 * @param skipLeading0x false to add leading `0x` (default), true to not add (skip)..
196 * @return the hex-string representation of the value
197 * @see bytesHexString()
198 * @see from_hexstring()
199 */
200 template< class value_type,
201 std::enable_if_t<!std::is_pointer_v<value_type> &&
202 std::is_standard_layout_v<value_type>,
203 bool> = true>
204 inline std::string to_hexstring(value_type const & v, const bool skipLeading0x=false) noexcept
205 {
206 if constexpr( is_little_endian() ) {
207 return bytesHexString(pointer_cast<const uint8_t*>(&v), sizeof(v),
208 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
209 } else {
210 const value_type v_le = jau::bswap(v);
211 return bytesHexString(pointer_cast<const uint8_t*>(&v_le), sizeof(v),
212 false /* lsbFirst */, true /* lowerCase */, skipLeading0x);
213 }
214 }
215
216 /**
217 // *************************************************
218 // *************************************************
219 // *************************************************
220 */
221
222 /**
223 * Produce a decimal string representation of an integral integer value.
224 * @tparam T an integral integer type
225 * @param v the integral integer value
226 * @param separator if not 0, use as separation character, otherwise no separation characters are being used
227 * @param width the minimum number of characters to be printed. Add padding with blank space if result is shorter.
228 * @return the string representation of the integral integer value
229 */
230 template< class value_type,
231 std::enable_if_t< std::is_integral_v<value_type>,
232 bool> = true>
233 std::string to_decstring(const value_type& v, const char separator=',', const nsize_t width=0) noexcept {
234 const snsize_t v_sign = jau::sign<value_type>(v);
235 const nsize_t digit10_count1 = jau::digits10<value_type>(v, v_sign, true /* sign_is_digit */);
236 const nsize_t digit10_count2 = v_sign < 0 ? digit10_count1 - 1 : digit10_count1; // less sign
237
238 const nsize_t comma_count = 0 == separator ? 0 : ( digit10_count1 - 1 ) / 3;
239 const nsize_t net_chars = digit10_count1 + comma_count;
240 const nsize_t total_chars = std::max<nsize_t>(width, net_chars);
241 std::string res(total_chars, ' ');
242
243 value_type n = v;
244 nsize_t char_iter = 0;
245
246 for(nsize_t digit10_iter = 0; digit10_iter < digit10_count2 /* && char_iter < total_chars */; digit10_iter++ ) {
247 const int digit = v_sign < 0 ? invert_sign( n % 10 ) : n % 10;
248 n /= 10;
249 if( 0 < digit10_iter && 0 == digit10_iter % 3 ) {
250 res[total_chars-1-(char_iter++)] = separator;
251 }
252 res[total_chars-1-(char_iter++)] = '0' + digit;
253 }
254 if( v_sign < 0 /* && char_iter < total_chars */ ) {
255 res[total_chars-1-(char_iter++)] = '-';
256 }
257 return res;
258 }
259
260 /**
261 // *************************************************
262 // *************************************************
263 // *************************************************
264 */
265
266 /**
267 * Returns a string according to `vprintf()` formatting rules
268 * using `va_list` instead of a variable number of arguments.
269 * @param format `printf()` compliant format string
270 * @param ap `va_list` arguments
271 */
272 std::string vformat_string(const char* format, va_list ap);
273
274 /**
275 * Returns a string according to `printf()` formatting rules
276 * and variable number of arguments following the `format` argument.
277 * @param format `printf()` compliant format string
278 */
279 std::string format_string(const char* format, ...);
280
281 /**
282 * Safely returns a string according to `printf()` formatting rules
283 * and variable number of arguments following the `format` argument.
284 *
285 * jau::cfmt2::check() is utilize to pre-validate the given arguments
286 * against the format string. If invalid, method returns an empty string.
287 * Otherwise std::snprintf() is being utilized.
288 *
289 * @param maxStrLen maximum resulting string length
290 * @param format `printf()` compliant format string
291 * @param args optional arguments matching the format string
292 */
293 template <typename... Args>
294 constexpr std::string format_string_v(const std::size_t maxStrLen, const std::string_view format, const Args &...args) {
295 if ( jau::cfmt::check2<Args...>(format) ) {
296 std::string str;
297 str.reserve(maxStrLen + 1); // incl. EOS
298 str.resize(maxStrLen); // excl. EOS
299
300 // -Wformat=2 -> -Wformat -Wformat-nonliteral -Wformat-security -Wformat-y2k
301 // -Wformat=2 -Wformat-overflow=2 -Wformat-signedness
304 const size_t nchars = std::snprintf(&str[0], maxStrLen + 1, format.data(), args...);
306 if( nchars < maxStrLen + 1 ) {
307 str.resize(nchars);
308 str.shrink_to_fit();
309 } // else truncated w/ nchars > MaxStrLen
310 return str;
311 } else {
312 return "";
313 }
314 }
315
316 /**
317 // *************************************************
318 // *************************************************
319 // *************************************************
320 */
321
322 template< class value_type,
323 std::enable_if_t< ( std::is_integral_v<value_type> && !std::is_same_v<bool, value_type> ) ||
324 std::is_floating_point_v<value_type>,
325 bool> = true>
326 inline std::string to_string(const value_type & ref)
327 {
328 return std::to_string(ref);
329 }
330
331 template< class value_type,
332 std::enable_if_t< std::is_same_v<bool, value_type>,
333 bool> = true>
334 inline std::string to_string(const value_type & ref)
335 {
336 return ref ? "T" : "F";
337 }
338
339 template< class value_type,
340 std::enable_if_t<!std::is_integral_v<value_type> &&
341 !std::is_floating_point_v<value_type> &&
342 std::is_base_of_v<std::string, value_type>,
343 bool> = true>
344 inline std::string to_string(const value_type & ref) {
345 return ref;
346 }
347
348 template< class value_type,
349 std::enable_if_t<!std::is_integral_v<value_type> &&
350 !std::is_floating_point_v<value_type> &&
351 !std::is_base_of_v<std::string, value_type> &&
352 std::is_base_of_v<std::string_view, value_type>,
353 bool> = true>
354 inline std::string to_string(const value_type & ref) {
355 return std::string(ref);
356 }
357
358 template< class value_type,
359 std::enable_if_t<!std::is_integral_v<value_type> &&
360 !std::is_floating_point_v<value_type> &&
361 !std::is_base_of_v<std::string, value_type> &&
362 !std::is_base_of_v<std::string_view, value_type> &&
363 std::is_pointer_v<value_type>,
364 bool> = true>
365 inline std::string to_string(const value_type & ref)
366 {
367 return to_hexstring((void*)ref); // NOLINT(bugprone-multi-level-implicit-pointer-conversion)
368 }
369
370 template< class value_type,
371 std::enable_if_t<!std::is_integral_v<value_type> &&
372 !std::is_floating_point_v<value_type> &&
373 !std::is_base_of_v<std::string, value_type> &&
374 !std::is_base_of_v<std::string_view, value_type> &&
375 !std::is_pointer_v<value_type> &&
377 bool> = true>
378 inline std::string to_string(const value_type & ref) {
379 return ref.toString();
380 }
381
382 template< class value_type,
383 std::enable_if_t<!std::is_integral_v<value_type> &&
384 !std::is_floating_point_v<value_type> &&
385 !std::is_base_of_v<std::string, value_type> &&
386 !std::is_base_of_v<std::string_view, value_type> &&
387 !std::is_pointer_v<value_type> &&
390 bool> = true>
391 inline std::string to_string(const value_type & ref) {
392 return ref.to_string();
393 }
394
395 template< class value_type,
396 std::enable_if_t<!std::is_integral_v<value_type> &&
397 !std::is_floating_point_v<value_type> &&
398 !std::is_base_of_v<std::string, value_type> &&
399 !std::is_base_of_v<std::string_view, value_type> &&
400 !std::is_pointer_v<value_type> &&
404 bool> = true>
405 inline std::string to_string(const value_type & ref) {
406 return to_hexstring((void*)ref.operator->());
407 }
408
409 template< class value_type,
410 std::enable_if_t<!std::is_integral_v<value_type> &&
411 !std::is_floating_point_v<value_type> &&
412 !std::is_base_of_v<std::string, value_type> &&
413 !std::is_base_of_v<std::string_view, value_type> &&
414 !std::is_pointer_v<value_type> &&
418 bool> = true>
419 inline std::string to_string(const value_type & ref) {
420 (void)ref;
421 return "jau::to_string<T> n/a for type "+type_cue<value_type>::to_string();
422 }
423
424 template<typename T>
425 std::string to_string(std::vector<T> const &list, const std::string& delim)
426 {
427 if ( list.empty() ) {
428 return std::string();
429 }
430 bool need_delim = false;
431 std::string res;
432 for(const T& e : list) {
433 if( need_delim ) {
434 res.append( delim );
435 }
436 res.append( to_string( e ) );
437 need_delim = true;
438 }
439 return res;
440 }
441 template<typename T>
442 std::string to_string(std::vector<T> const &list) { return to_string<T>(list, ", "); }
443
444 bool to_integer(long long & result, const std::string& str, const char limiter='\0', const char *limiter_pos=nullptr);
445 bool to_integer(long long & result, const char * str, size_t str_len, const char limiter='\0', const char *limiter_pos=nullptr);
446
447 /**
448 * C++20: Heterogeneous Lookup in (Un)ordered Containers
449 *
450 * @see https://www.cppstories.com/2021/heterogeneous-access-cpp20/
451 */
452 struct string_hash {
453 using is_transparent = void;
454 [[nodiscard]] size_t operator()(const char* txt) const {
455 return std::hash<std::string_view>{}(txt);
456 }
457 [[nodiscard]] size_t operator()(std::string_view txt) const {
458 return std::hash<std::string_view>{}(txt);
459 }
460 [[nodiscard]] size_t operator()(const std::string& txt) const {
461 return std::hash<std::string>{}(txt);
462 }
463 };
464
465 template<typename T>
466 using StringHashMap = std::unordered_map<std::string, T, string_hash, std::equal_to<>>;
467
468 using StringHashSet = std::unordered_set<std::string, string_hash, std::equal_to<>>;
469
470 /**@}*/
471
472} // namespace jau
473
474#define jau_format_string_static(...) \
475 jau::format_string(__VA_ARGS__); \
476 static_assert( 0 <= jau::cfmt::checkR(__VA_ARGS__).argCount() ); // compile time validation!
477
478/** \example test_intdecstring01.cpp
479 * This C++ unit test validates the jau::to_decstring implementation
480 */
481
482#endif /* JAU_STRING_UTIL_HPP_ */
constexpr bool is_little_endian() noexcept
Evaluates true if platform is running in little endian mode, i.e.
constexpr uint16_t bswap(uint16_t const source) noexcept
Definition byte_util.hpp:86
std::string to_string(const endian_t v) noexcept
Return std::string representation of the given endian.
constexpr bool is_little_endian(const endian_t byte_order) noexcept
Returns true if given byte_order equals endian::little, otherwise false.
constexpr uint16_t cpu_to_le(uint16_t const h) noexcept
constexpr bool value(const Bool rhs) noexcept
constexpr bool has_toString_v
constexpr bool has_member_of_pointer_v
constexpr bool has_to_string_v
#define PRAGMA_DISABLE_WARNING_PUSH
constexpr std::enable_if_t< sizeof(Dest)==sizeof(Source) &&std::is_pointer_v< Source > &&std::is_pointer_v< Dest >, Dest > pointer_cast(const Source &src) noexcept
A constexpr pointer cast implementation for C++17, inspired by C++20 bit_cast<>(arg).
#define PRAGMA_DISABLE_WARNING_POP
#define PRAGMA_DISABLE_WARNING_FORMAT_NONLITERAL
constexpr T invert_sign(const T x) noexcept
Safely inverts the sign of an arithmetic number w/ branching in O(1)
constexpr nsize_t digits10(const T x, const snsize_t x_sign, const bool sign_is_digit=true) noexcept
Returns the number of decimal digits of the given integral value number using std::log10<T>().
Definition int_math.hpp:403
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition int_types.hpp:55
int_fast32_t snsize_t
Natural 'ssize_t' alternative using int_fast32_t as its natural sized type.
Definition int_types.hpp:67
constexpr int sign(const T x) noexcept
Returns the value of the sign function (w/o branching ?) in O(1).
Definition base_math.hpp:84
constexpr std::string format_string_v(const std::size_t maxStrLen, const std::string_view format, const Args &...args)
Safely returns a string according to printf() formatting rules and variable number of arguments follo...
std::string & toLowerInPlace(std::string &s) noexcept
std::string trim(const std::string &s) noexcept
trim copy
std::string vformat_string(const char *format, va_list ap)
Returns a string according to vprintf() formatting rules using va_list instead of a variable number o...
void trimInPlace(std::string &s) noexcept
trim in place
std::string to_hexstring(value_type const &v, const bool skipLeading0x=false) noexcept
Produce a lower-case hexadecimal string representation with leading 0x in MSB of the given pointer.
std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept
Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
uint64_t from_hexstring(std::string const &s, const bool lsbFirst=!jau::is_little_endian(), const bool checkLeading0x=true) noexcept
Converts a given hexadecimal string representation into a uint64_t value according to hexStringBytes(...
std::string & byteHexString(std::string &dest, const uint8_t value, const bool lowerCase) noexcept
Produce a hexadecimal string representation of the given byte value.
std::unordered_set< std::string, string_hash, std::equal_to<> > StringHashSet
bool to_integer(long long &result, const std::string &str, const char limiter='\0', const char *limiter_pos=nullptr)
std::string bytesHexString(const void *data, const nsize_t length, const bool lsbFirst, const bool lowerCase=true, const bool skipLeading0x=false) noexcept
Produce a hexadecimal string representation of the given byte values.
constexpr bool check2(const std::string_view fmt) noexcept
Strict type validation of arguments against the format string.
std::unordered_map< std::string, T, string_hash, std::equal_to<> > StringHashMap
std::vector< std::string > split_string(const std::string &str, const std::string &separator) noexcept
Split given string str at separator into the resulting std::vector excluding the separator sequence .
std::string format_string(const char *format,...)
Returns a string according to printf() formatting rules and variable number of arguments following th...
bool is_ascii_code(int c) noexcept
std::string toLower(const std::string &s) noexcept
size_t hexStringBytes(std::vector< uint8_t > &out, const std::string &hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept
Converts a given hexadecimal string representation into a byte vector.
std::string to_decstring(const value_type &v, const char separator=',', const nsize_t width=0) noexcept
Produce a decimal string representation of an integral integer value.
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition backtrace.hpp:32
STL namespace.
C++20: Heterogeneous Lookup in (Un)ordered Containers.
size_t operator()(const std::string &txt) const
size_t operator()(const char *txt) const
size_t operator()(std::string_view txt) const
static std::string to_string(const bool withSize=true) noexcept