jaulib v1.3.0
Jau Support Library (C++, Java, ..)
string_util.hpp
Go to the documentation of this file.
1/*
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#ifndef JAU_STRING_UTIL_HPP_
26#define JAU_STRING_UTIL_HPP_
27
28#include <cstdint>
29#include <cstring>
30#include <string>
31#include <cstdarg>
32#include <memory>
33#include <type_traits>
34#include <vector>
35
36#include <jau/cpp_lang_util.hpp>
39
40#include <jau/int_types.hpp>
41#include <jau/int_math.hpp>
42
43namespace jau {
44
45 /** @defgroup StringUtils String Utilities
46 * String utilities for type conversion and manipulation.
47 *
48 * @{
49 */
50
51 /**
52 * Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
53 * <p>
54 * The maximum length only delimits the string length and does not contain the EOS null byte.
55 * An EOS null byte will will be added.
56 * </p>
57 * <p>
58 * The source string within buffer is not required to contain an EOS null byte;
59 * </p>
60 */
61 std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept;
62
63 /** trim in place */
64 void trimInPlace(std::string &s) noexcept;
65
66 /** trim copy */
67 std::string trim(const std::string &s) noexcept;
68
69 /** Split given string `str` at `separator` into the resulting std::vector excluding the separator sequence . */
70 std::vector<std::string> split_string(const std::string& str, const std::string& separator) noexcept;
71
72 std::string& toLowerInPlace(std::string& s) noexcept;
73
74 std::string toLower(const std::string& s) noexcept;
75
76 /**
77 // *************************************************
78 // *************************************************
79 // *************************************************
80 */
81
82 /**
83 * Converts a given hexadecimal string representation into a byte vector.
84 *
85 * In case a non valid hexadecimal digit appears in the given string,
86 * conversion ends and fills the byte vector up until the violation.
87 *
88 * @param out the byte vector sink
89 * @param hexstr the hexadecimal string representation
90 * @param lsbFirst low significant byte first
91 * @param checkLeading0x if true, checks for a leading `0x` and removes it, otherwise not.
92 * @return the length of the matching byte vector
93 */
94 size_t hexStringBytes(std::vector<uint8_t>& out, const std::string& hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept;
95
96 /** See hexStringBytes() */
97 size_t hexStringBytes(std::vector<uint8_t>& out, const uint8_t hexstr[], const size_t hexstr_len, const bool lsbFirst, const bool checkLeading0x) noexcept;
98
99 /**
100 * Produce a hexadecimal string representation of the given byte values.
101 * <p>
102 * If lsbFirst is true, orders LSB left -> MSB right, usual for byte streams. Result will not have a leading `0x`.<br>
103 * Otherwise orders MSB left -> LSB right, usual for readable integer values. Result will have a leading `0x`.
104 * </p>
105 * @param data pointer to the first byte to print, less offset
106 * @param offset offset to bytes pointer to the first byte to print
107 * @param length number of bytes to print
108 * @param lsbFirst true having the least significant byte printed first (lowest addressed byte to highest),
109 * otherwise have the most significant byte printed first (highest addressed byte to lowest).
110 * A leading `0x` will be prepended if `lsbFirst == false`.
111 * @param lowerCase true to use lower case hex-chars, otherwise capital letters are being used.
112 * @return the hex-string representation of the data
113 */
114 std::string bytesHexString(const void* data, const nsize_t offset, const nsize_t length,
115 const bool lsbFirst, const bool lowerCase=true) noexcept;
116
117 template< class uint8_container_type,
118 std::enable_if_t<std::is_integral_v<typename uint8_container_type::value_type> &&
119 std::is_convertible_v<typename uint8_container_type::value_type, uint8_t>,
120 bool> = true>
121 std::string bytesHexString(const uint8_container_type& bytes,
122 const bool lsbFirst, const bool lowerCase=true) noexcept {
123 return bytesHexString((const uint8_t *)bytes.data(), 0, bytes.size(), lsbFirst, lowerCase);
124 }
125
126 /**
127 * Produce a hexadecimal string representation of the given byte value.
128 * @param dest the std::string reference destination to append
129 * @param value the byte value to represent
130 * @param lowerCase true to use lower case hex-chars, otherwise capital letters are being used.
131 * @return the given std::string reference for chaining
132 */
133 std::string& byteHexString(std::string& dest, const uint8_t value, const bool lowerCase) noexcept;
134
135 /**
136 * Produce a lower-case hexadecimal string representation of the given pointer.
137 * @tparam value_type a pointer type
138 * @param v the pointer of given pointer type
139 * @return the hex-string representation of the value
140 * @see bytesHexString()
141 */
142 template< class value_type,
143 std::enable_if_t<std::is_pointer_v<value_type>,
144 bool> = true>
145 inline std::string to_hexstring(value_type const & v) noexcept
146 {
147 const uintptr_t v2 = reinterpret_cast<uintptr_t>(v);
148 return bytesHexString(pointer_cast<const uint8_t*>(&v2), 0, sizeof(v), false /* lsbFirst */);
149 }
150
151 /**
152 * Produce a lower-case hexadecimal string representation of the given value with standard layout.
153 * @tparam value_type a standard layout value type
154 * @param v the value of given standard layout type
155 * @return the hex-string representation of the value
156 * @see bytesHexString()
157 */
158 template< class value_type,
159 std::enable_if_t<!std::is_pointer_v<value_type> &&
160 std::is_standard_layout_v<value_type>,
161 bool> = true>
162 inline std::string to_hexstring(value_type const & v) noexcept
163 {
164 return bytesHexString(pointer_cast<const uint8_t*>(&v), 0, sizeof(v), false /* lsbFirst */);
165 }
166
167 /**
168 // *************************************************
169 // *************************************************
170 // *************************************************
171 */
172
173 /**
174 * Produce a decimal string representation of an integral integer value.
175 * @tparam T an integral integer type
176 * @param v the integral integer value
177 * @param separator if not 0, use as separation character, otherwise no separation characters are being used
178 * @param width the minimum number of characters to be printed. Add padding with blank space if result is shorter.
179 * @return the string representation of the integral integer value
180 */
181 template< class value_type,
182 std::enable_if_t< std::is_integral_v<value_type>,
183 bool> = true>
184 std::string to_decstring(const value_type& v, const char separator=',', const nsize_t width=0) noexcept {
185 const snsize_t v_sign = jau::sign<value_type>(v);
186 const nsize_t digit10_count1 = jau::digits10<value_type>(v, v_sign, true /* sign_is_digit */);
187 const nsize_t digit10_count2 = v_sign < 0 ? digit10_count1 - 1 : digit10_count1; // less sign
188
189 const nsize_t comma_count = 0 == separator ? 0 : ( digit10_count1 - 1 ) / 3;
190 const nsize_t net_chars = digit10_count1 + comma_count;
191 const nsize_t total_chars = std::max<nsize_t>(width, net_chars);
192 std::string res(total_chars, ' ');
193
194 value_type n = v;
195 nsize_t char_iter = 0;
196
197 for(nsize_t digit10_iter = 0; digit10_iter < digit10_count2 /* && char_iter < total_chars */; digit10_iter++ ) {
198 const int digit = v_sign < 0 ? invert_sign( n % 10 ) : n % 10;
199 n /= 10;
200 if( 0 < digit10_iter && 0 == digit10_iter % 3 ) {
201 res[total_chars-1-(char_iter++)] = separator;
202 }
203 res[total_chars-1-(char_iter++)] = '0' + digit;
204 }
205 if( v_sign < 0 /* && char_iter < total_chars */ ) {
206 res[total_chars-1-(char_iter++)] = '-';
207 }
208 return res;
209 }
210
211 /**
212 // *************************************************
213 // *************************************************
214 // *************************************************
215 */
216
217 /**
218 * Returns a string according to `vprintf()` formatting rules
219 * using `va_list` instead of a variable number of arguments.
220 * @param format `printf()` compliant format string
221 * @param ap `va_list` arguments
222 */
223 std::string vformat_string(const char* format, va_list ap) noexcept;
224
225 /**
226 * Returns a string according to `printf()` formatting rules
227 * and variable number of arguments following the `format` argument.
228 * @param format `printf()` compliant format string
229 */
230 std::string format_string(const char* format, ...) noexcept;
231
232 /**
233 // *************************************************
234 // *************************************************
235 // *************************************************
236 */
237
238 template< class value_type,
239 std::enable_if_t< std::is_integral_v<value_type> ||
240 std::is_floating_point_v<value_type>,
241 bool> = true>
242 inline std::string to_string(const value_type & ref)
243 {
244 return std::to_string(ref);
245 }
246
247 template< class value_type,
248 std::enable_if_t<!std::is_integral_v<value_type> &&
249 !std::is_floating_point_v<value_type> &&
250 std::is_base_of_v<std::string, value_type>,
251 bool> = true>
252 inline std::string to_string(const value_type & ref) {
253 return ref;
254 }
255
256 template< class value_type,
257 std::enable_if_t<!std::is_integral_v<value_type> &&
258 !std::is_floating_point_v<value_type> &&
259 !std::is_base_of_v<std::string, value_type> &&
260 std::is_base_of_v<std::string_view, value_type>,
261 bool> = true>
262 inline std::string to_string(const value_type & ref) {
263 return std::string(ref);
264 }
265
266 template< class value_type,
267 std::enable_if_t<!std::is_integral_v<value_type> &&
268 !std::is_floating_point_v<value_type> &&
269 !std::is_base_of_v<std::string, value_type> &&
270 !std::is_base_of_v<std::string_view, value_type> &&
271 std::is_pointer_v<value_type>,
272 bool> = true>
273 inline std::string to_string(const value_type & ref)
274 {
275 return to_hexstring((void*)ref);
276 }
277
278 template< class value_type,
279 std::enable_if_t<!std::is_integral_v<value_type> &&
280 !std::is_floating_point_v<value_type> &&
281 !std::is_base_of_v<std::string, value_type> &&
282 !std::is_base_of_v<std::string_view, value_type> &&
283 !std::is_pointer_v<value_type> &&
284 jau::has_toString_v<value_type>,
285 bool> = true>
286 inline std::string to_string(const value_type & ref) {
287 return ref.toString();
288 }
289
290 template< class value_type,
291 std::enable_if_t<!std::is_integral_v<value_type> &&
292 !std::is_floating_point_v<value_type> &&
293 !std::is_base_of_v<std::string, value_type> &&
294 !std::is_base_of_v<std::string_view, value_type> &&
295 !std::is_pointer_v<value_type> &&
296 !jau::has_toString_v<value_type> &&
297 jau::has_to_string_v<value_type>,
298 bool> = true>
299 inline std::string to_string(const value_type & ref) {
300 return ref.to_string();
301 }
302
303 template< class value_type,
304 std::enable_if_t<!std::is_integral_v<value_type> &&
305 !std::is_floating_point_v<value_type> &&
306 !std::is_base_of_v<std::string, value_type> &&
307 !std::is_base_of_v<std::string_view, value_type> &&
308 !std::is_pointer_v<value_type> &&
309 !jau::has_toString_v<value_type> &&
310 !jau::has_to_string_v<value_type> &&
311 jau::has_member_of_pointer_v<value_type>,
312 bool> = true>
313 inline std::string to_string(const value_type & ref) {
314 return to_hexstring((void*)ref.operator->());
315 }
316
317 template< class value_type,
318 std::enable_if_t<!std::is_integral_v<value_type> &&
319 !std::is_floating_point_v<value_type> &&
320 !std::is_base_of_v<std::string, value_type> &&
321 !std::is_base_of_v<std::string_view, value_type> &&
322 !std::is_pointer_v<value_type> &&
323 !jau::has_toString_v<value_type> &&
324 !jau::has_to_string_v<value_type> &&
325 !jau::has_member_of_pointer_v<value_type>,
326 bool> = true>
327 inline std::string to_string(const value_type & ref) {
328 (void)ref;
329 return "jau::to_string<T> not available for "+type_cue<value_type>::print("unknown", TypeTraitGroup::ALL);
330 }
331
332 template<typename T>
333 std::string to_string(std::vector<T> const &list, const std::string& delim)
334 {
335 if ( list.empty() ) {
336 return std::string();
337 }
338 bool need_delim = false;
339 std::string res;
340 for(const T& e : list) {
341 if( need_delim ) {
342 res.append( delim );
343 }
344 res.append( to_string( e ) );
345 need_delim = true;
346 }
347 return res;
348 }
349 template<typename T>
350 std::string to_string(std::vector<T> const &list) { return to_string<T>(list, ", "); }
351
352 bool to_integer(long long & result, const std::string& str, const char limiter='\0', const char *limiter_pos=nullptr);
353 bool to_integer(long long & result, const char * str, size_t str_len, const char limiter='\0', const char *limiter_pos=nullptr);
354
355 /**@}*/
356
357} // namespace jau
358
359/** \example test_intdecstring01.cpp
360 * This C++ unit test validates the jau::to_decstring implementation
361 */
362
363#endif /* JAU_STRING_UTIL_HPP_ */
std::string to_string(const endian_t v) noexcept
Return std::string representation of the given endian.
constexpr T invert_sign(const T x) noexcept
Safely inverts the sign of an arithmetic number w/ branching in O(1)
Definition: base_math.hpp:126
uint_fast32_t nsize_t
Natural 'size_t' alternative using uint_fast32_t as its natural sized type.
Definition: int_types.hpp:53
int_fast32_t snsize_t
Natural 'ssize_t' alternative using int_fast32_t as its natural sized type.
Definition: int_types.hpp:65
std::string & toLowerInPlace(std::string &s) noexcept
std::string format_string(const char *format,...) noexcept
Returns a string according to printf() formatting rules and variable number of arguments following th...
std::string trim(const std::string &s) noexcept
trim copy
std::string bytesHexString(const void *data, const nsize_t offset, const nsize_t length, const bool lsbFirst, const bool lowerCase=true) noexcept
Produce a hexadecimal string representation of the given byte values.
void trimInPlace(std::string &s) noexcept
trim in place
std::string to_string(std::vector< T > const &list)
std::string get_string(const uint8_t *buffer, nsize_t const buffer_len, nsize_t const max_len) noexcept
Returns a C++ String taken from buffer with maximum length of min(max_len, max_len).
std::string & byteHexString(std::string &dest, const uint8_t value, const bool lowerCase) noexcept
Produce a hexadecimal string representation of the given byte value.
std::string vformat_string(const char *format, va_list ap) noexcept
Returns a string according to vprintf() formatting rules using va_list instead of a variable number o...
bool to_integer(long long &result, const std::string &str, const char limiter='\0', const char *limiter_pos=nullptr)
std::vector< std::string > split_string(const std::string &str, const std::string &separator) noexcept
Split given string str at separator into the resulting std::vector excluding the separator sequence .
std::string toLower(const std::string &s) noexcept
size_t hexStringBytes(std::vector< uint8_t > &out, const std::string &hexstr, const bool lsbFirst, const bool checkLeading0x) noexcept
Converts a given hexadecimal string representation into a byte vector.
std::string to_hexstring(value_type const &v) noexcept
Produce a lower-case hexadecimal string representation of the given pointer.
std::string to_decstring(const value_type &v, const char separator=',', const nsize_t width=0) noexcept
Produce a decimal string representation of an integral integer value.
__pack(...): Produces MSVC, clang and gcc compatible lead-in and -out macros.
Definition: backtrace.hpp:32
STL namespace.
static void print(const std::string &typedefname, const TypeTraitGroup verbosity=TypeTraitGroup::NONE)
Print information of this type to stdout, potentially with all Type traits known.