diff --git a/README.md b/README.md index 8dc7eaf..7102259 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ![PyPI - License](https://img.shields.io/pypi/l/pysimdjson.svg?style=flat-square) -![Tests](https://github.com/TkTech/pysimdjson/workflows/Run%20tests/badge.svg) +[![tests](https://github.com/TkTech/pysimdjson/actions/workflows/release.yml/badge.svg)](https://github.com/TkTech/pysimdjson/actions/workflows/release.yml) # pysimdjson @@ -10,7 +10,8 @@ pysimdjson safe to use anywhere. Bindings are currently tested on OS X, Linux, and Windows for Python version 3.9 to 3.12. -## 📝 Documentation + +## 📈 Documentation The latest documentation can be found at https://pysimdjson.tkte.ch. @@ -24,3 +25,33 @@ found in its sister project [json_benchmark][]. [simdjson]: https://github.com/lemire/simdjson [json_benchmark]: https://github.com/tktech/json_benchmark + +## 📈 Building Locally + +To build pysimdjson from source, you'll need Python 3.9+ and a C++ compiler. + +1. **Install uv** (if not already installed): + ```bash + pip install uv + ``` + +2. **Clone the repository**: + ```bash + git clone https://github.com/TkTech/pysimdjson.git + cd pysimdjson + ``` + +3. **Build the package**: + ```bash + uv sync + ``` + +4. **(Optional) Run tests**: + ```bash + uv run pytest + ``` + +5. **(Optional) Build documentation**: + ```bash + cd docs && uv run make html + ``` diff --git a/pyproject.toml b/pyproject.toml index 1d7fc22..53f0262 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,9 @@ build-backend = "setuptools.build_meta" [project] name = "pysimdjson" version = "7.0.2" -description = "Add your description here" +description = "Python bindings for the simdjson project, a SIMD-accelerated JSON parser" readme = "README.md" +license = "MIT" requires-python = ">=3.9" dependencies = [ ] diff --git a/simdjson/simdjson.cpp b/simdjson/simdjson.cpp index d0f441b..bcd6da6 100644 --- a/simdjson/simdjson.cpp +++ b/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */ +/* auto-generated on 2025-12-17 20:32:36 -0500. version 4.2.4 Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -40,6 +40,11 @@ #endif #endif +// C++ 26 +#if !defined(SIMDJSON_CPLUSPLUS26) && (SIMDJSON_CPLUSPLUS >= 202402L) // update when the standard is finalized +#define SIMDJSON_CPLUSPLUS26 1 +#endif + // C++ 23 #if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) #define SIMDJSON_CPLUSPLUS23 1 @@ -77,30 +82,71 @@ #endif #endif +#ifndef SIMDJSON_CONSTEXPR_LAMBDA +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_CONSTEXPR_LAMBDA constexpr +#else +#define SIMDJSON_CONSTEXPR_LAMBDA +#endif +#endif + + + #ifdef __has_include #if __has_include() #include #endif #endif +// The current specification is unclear on how we detect +// static reflection, both __cpp_lib_reflection and +// __cpp_impl_reflection are proposed in the draft specification. +// For now, we disable static reflect by default. It must be +// specified at compiler time. +#ifndef SIMDJSON_STATIC_REFLECTION +#define SIMDJSON_STATIC_REFLECTION 0 // disabled by default. +#endif + #if defined(__apple_build_version__) #if __apple_build_version__ < 14000000 #define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to #endif #endif +#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L +#include +#define SIMDJSON_SUPPORTS_RANGES 1 +#else +#define SIMDJSON_SUPPORTS_RANGES 0 +#endif #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #if __cpp_concepts >= 201907L #include -#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#define SIMDJSON_SUPPORTS_CONCEPTS 1 #else -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +// copy SIMDJSON_SUPPORTS_CONCEPTS to SIMDJSON_SUPPORTS_DESERIALIZATION. +#if SIMDJSON_SUPPORTS_CONCEPTS +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif + + +#if !defined(SIMDJSON_CONSTEVAL) +#if defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#define SIMDJSON_CONSTEVAL 1 +#else +#define SIMDJSON_CONSTEVAL 0 +#endif // defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#endif // !defined(SIMDJSON_CONSTEVAL) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -152,6 +198,22 @@ using std::size_t; #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 + #if __riscv_v_intrinsic >= 11000 + #define SIMDJSON_HAS_RVV_INTRINSICS 1 + #endif + + #define SIMDJSON_HAS_ZVBB_INTRINSICS \ + 0 // there is currently no way to detect this + + #if SIMDJSON_HAS_RVV_INTRINSICS && __riscv_vector && \ + __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 + // RISC-V V extension + #define SIMDJSON_IS_RVV 1 + #if SIMDJSON_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 + // RISC-V Vector Basic Bit-manipulation + #define SIMDJSON_IS_ZVBB 1 + #endif + #endif #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) @@ -298,6 +360,7 @@ using std::size_t; #if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, // then do away with asserts and use __assume. +// We still recommend that our users set NDEBUG in release mode. #if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) @@ -373,7 +436,7 @@ double from_chars(const char *first, const char* end) noexcept; } #ifndef SIMDJSON_EXCEPTIONS -#if __cpp_exceptions +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 @@ -549,17 +612,6 @@ double from_chars(const char *first, const char* end) noexcept; // We assume by default static linkage #define SIMDJSON_DLLIMPORTEXPORT #endif - -/** - * Workaround for the vcpkg package manager. Only vcpkg should - * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. - */ -#if SIMDJSON_USING_LIBRARY -#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) -#endif -/** - * End of workaround for the vcpkg package manager. - */ #else #define SIMDJSON_DLLIMPORTEXPORT #endif @@ -576,12 +628,14 @@ double from_chars(const char *first, const char* end) noexcept; // even if we do not have C++17 support. #ifdef __cpp_lib_string_view #define SIMDJSON_HAS_STRING_VIEW +#include #endif // Some systems have string_view even if we do not have C++17 support, // and even if __cpp_lib_string_view is undefined, it is the case // with Apple clang version 11. // We must handle it. *This is important.* +#ifndef _MSC_VER #ifndef SIMDJSON_HAS_STRING_VIEW #if defined __has_include // do not combine the next #if with the previous one (unsafe) @@ -597,6 +651,7 @@ double from_chars(const char *first, const char* end) noexcept; #endif // __has_include () #endif // defined __has_include #endif // def SIMDJSON_HAS_STRING_VIEW +#endif // def _MSC_VER // end of complicated but important routine to try to detect string_view. // @@ -1019,9 +1074,9 @@ using std::operator<<; #endif #if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] +# define nssv_nodiscard simdjson_warn_unused #else -# define nssv_nodiscard /*[[nodiscard]]*/ +# define nssv_nodiscard /*simdjson_warn_unused*/ #endif // Additional includes: @@ -2339,16 +2394,25 @@ namespace std { // It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer // sets _DEBUG in a release build under Visual Studio, or if some compiler fails to // set the __OPTIMIZE__ macro). +// We make it so that if NDEBUG is defined, then SIMDJSON_DEVELOPMENT_CHECKS +// is not defined, irrespective of the compiler. +// We recommend that users set NDEBUG in release builds, so that +// SIMDJSON_DEVELOPMENT_CHECKS is not defined in release builds by default, +// irrespective of the compiler. #ifndef SIMDJSON_DEVELOPMENT_CHECKS #ifdef _MSC_VER // Visual Studio seems to set _DEBUG for debug builds. -#ifdef _DEBUG +// We set SIMDJSON_DEVELOPMENT_CHECKS to 1 if _DEBUG is defined +// and NDEBUG is not defined. +#if defined(_DEBUG) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // _DEBUG #else // _MSC_VER // All other compilers appear to set __OPTIMIZE__ to a positive integer // when the compiler is optimizing. -#ifndef __OPTIMIZE__ +// We only set SIMDJSON_DEVELOPMENT_CHECKS if both __OPTIMIZE__ +// and NDEBUG are not defined. +#if !defined(__OPTIMIZE__) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER @@ -2404,6 +2468,18 @@ namespace std { #define SIMDJSON_AVX512_ALLOWED 1 #endif + +#ifndef __has_cpp_attribute +#define simdjson_lifetime_bound +#elif __has_cpp_attribute(msvc::lifetimebound) +#define simdjson_lifetime_bound [[msvc::lifetimebound]] +#elif __has_cpp_attribute(clang::lifetimebound) +#define simdjson_lifetime_bound [[clang::lifetimebound]] +#elif __has_cpp_attribute(lifetimebound) +#define simdjson_lifetime_bound [[lifetimebound]] +#else +#define simdjson_lifetime_bound +#endif #endif // SIMDJSON_COMMON_DEFS_H /* end file simdjson/common_defs.h */ /* skipped duplicate #include "simdjson/compiler_check.h" */ @@ -2460,7 +2536,8 @@ enum error_code { SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input - NUM_ERROR_CODES + OUT_OF_CAPACITY, ///< The capacity was exceeded, we cannot allocate enough memory. + NUM_ERROR_CODES ///< Placeholder for end of error code list. }; /** @@ -2518,6 +2595,10 @@ namespace internal { /** * The result of a simdjson operation that could fail. * + * IMPORTANT: For the ondemand API, we use implementation_simdjson_result_base as a base class + * to avoid some compilation issue. Thus, if you modify this class, please ensure that the ondemand + * implementation_simdjson_result_base is also modified. + * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for @@ -2578,8 +2659,27 @@ struct simdjson_result_base : protected std::pair { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Dereference operator to access the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + /** * Get the result value. * @@ -2607,20 +2707,53 @@ struct simdjson_result_base : protected std::pair { * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); + #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error, return, exit, abort + * } else { + * // use value here. + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline T&& value_unsafe() && noexcept; + using value_type = T; + using error_type = error_code; }; // struct simdjson_result_base } // namespace internal @@ -2632,6 +2765,7 @@ struct simdjson_result_base : protected std::pair { */ template struct simdjson_result : public internal::simdjson_result_base { + /** * @private Create a new empty result with error = UNINITIALIZED. */ @@ -2664,13 +2798,32 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + /** + * Copy the value to a provided std::string, only enabled for std::string_view. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(std::string &value) && noexcept { + static_assert(std::is_same::value, "SFINAE"); + std::string_view v; + error_code error = std::forward>(*this).get(v); + if (!error) { + value.assign(v.data(), v.size()); + } + return error; + } + /** * The error. */ simdjson_inline error_code error() const noexcept; -#if SIMDJSON_EXCEPTIONS + +#if SIMDJSON_EXCEPTIONS + using internal::simdjson_result_base::operator*; + using internal::simdjson_result_base::operator->; /** * Get the result value. * @@ -2712,6 +2865,8 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_inline T&& value_unsafe() && noexcept; + using value_type = T; + using error_type = error_code; }; // struct simdjson_result #if SIMDJSON_EXCEPTIONS @@ -2741,7 +2896,7 @@ inline const std::string error_message(int error) noexcept; /* begin file simdjson/concepts.h */ #ifndef SIMDJSON_CONCEPTS_H #define SIMDJSON_CONCEPTS_H -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #include #include @@ -2773,7 +2928,9 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details - +template +concept is_pair = requires { typename T::first_type; typename T::second_type; } && + std::same_as>; template concept string_view_like = std::is_convertible_v && !std::is_convertible_v; @@ -2856,21 +3013,166 @@ concept optional_type = requires(std::remove_cvref_t obj) { { obj.value() } -> std::same_as::value_type&>; requires requires(typename std::remove_cvref_t::value_type &&val) { obj.emplace(std::move(val)); - obj = std::move(val); { obj.value_or(val) } -> std::convertible_to::value_type>; }; { static_cast(obj) } -> std::same_as; // convertible to bool + { obj.reset() } noexcept -> std::same_as; +}; + + + +// Types we serialize as JSON strings (not as containers) +template +concept string_like = + std::is_same_v, std::string> || + std::is_same_v, std::string_view> || + std::is_same_v, const char*> || + std::is_same_v, char*>; + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +// Now uses iterator-based approach for broader container support +template +concept container_but_not_string = + std::ranges::input_range && !string_like && !concepts::string_view_keyed_map; + + + +// Concept: Indexable container that is not a string or associative container +// Accepts: std::vector, std::array, std::deque (have operator[], value_type, not string_like) +// Rejects: std::string (string_like), std::list (no operator[]), std::map (has key_type) +template +concept indexable_container = requires { + typename Container::value_type; + requires !concepts::string_like; + requires !requires { typename Container::key_type; }; // Reject maps/sets + requires requires(Container& c, std::size_t i) { + { c[i] } -> std::convertible_to; + }; }; +// Variable template to use with std::meta::substitute +template +constexpr bool indexable_container_v = indexable_container; } // namespace concepts + + +/** + * We use tag_invoke as our customization point mechanism. + */ +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + } // namespace simdjson -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS #endif // SIMDJSON_CONCEPTS_H /* end file simdjson/concepts.h */ +/* including simdjson/constevalutil.h: #include "simdjson/constevalutil.h" */ +/* begin file simdjson/constevalutil.h */ +#ifndef SIMDJSON_CONSTEVALUTIL_H +#define SIMDJSON_CONSTEVALUTIL_H + +#include +#include +#include + +namespace simdjson { +namespace constevalutil { +#if SIMDJSON_CONSTEVAL + +constexpr static std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +constexpr static std::array control_chars = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; +// unoptimized, meant for compile-time execution +consteval std::string consteval_to_quoted_escaped(std::string_view input) { + std::string out = "\""; + for (char c : input) { + if (json_quotable_character[uint8_t(c)]) { + if (c == '"') { + out.append("\\\""); + } else if (c == '\\') { + out.append("\\\\"); + } else { + std::string_view v = control_chars[uint8_t(c)]; + out.append(v); + } + } else { + out.push_back(c); + } + } + out.push_back('"'); + return out; +} +#endif // SIMDJSON_CONSTEVAL + + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +struct fixed_string { + constexpr fixed_string(const char (&str)[N]) { + for (std::size_t i = 0; i < N; ++i) { + data[i] = str[i]; + } + } + char data[N]; + constexpr std::string_view view() const { return {data, N - 1}; } + constexpr size_t size() const { return N ; } + constexpr operator std::string_view() const { return view(); } + constexpr char operator[](std::size_t index) const { return data[index]; } + constexpr bool operator==(const fixed_string& other) const { + if (N != other.size()) { + return false; + } + for (std::size_t i = 0; i < N; ++i) { + if (data[i] != other.data[i]) { + return false; + } + } + return true; + } +}; +template +fixed_string(const char (&)[N]) -> fixed_string; + +template +struct string_constant { + static constexpr std::string_view value = str.view(); +}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace constevalutil +} // namespace simdjson +#endif // SIMDJSON_CONSTEVALUTIL_H +/* end file simdjson/constevalutil.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -3745,20 +4047,14 @@ void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { */ inline char *append_exponent(char *buf, int e) { - if (e < 0) { - e = -e; - *buf++ = '-'; - } else { - *buf++ = '+'; - } + bool isNegative = e < 0; + e = isNegative ? -e : e; + *buf++ = isNegative ? '-' : '+'; auto k = static_cast(e); - if (k < 10) { + if (k < 100) { // Always print at least two digits in the exponent. // This is for compatibility with printf("%g"). - *buf++ = '0'; - *buf++ = static_cast('0' + k); - } else if (k < 100) { *buf++ = static_cast('0' + k / 10); k %= 10; *buf++ = static_cast('0' + k); @@ -4598,8 +4894,38 @@ simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_inline bool simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -4624,6 +4950,7 @@ simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { #endif // SIMDJSON_EXCEPTIONS + template simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; @@ -4659,7 +4986,8 @@ simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noe } template -simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code +simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } @@ -4755,7 +5083,8 @@ namespace internal { { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." }, { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, - { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} + { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}, + { OUT_OF_CAPACITY, "OUT_OF_CAPACITY: The capacity was exceeded, we cannot allocate enough memory."} }; // error_messages[] } // namespace internal @@ -5025,7 +5354,665 @@ extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; // The truncated powers of five from 5^-342 all the way to 5^308 // The mantissa is truncated to 128 bits, and // never rounded up. Uses about 10KB. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +// We use the template trick to allow inclusion in multiple translation units. +#if SIMDJSON_STATIC_REFLECTION +template struct powers_template { +constexpr static uint64_t power_of_five_128[651*2]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; +}; +#endif // SIMDJSON_STATIC_REFLECTION + +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[651*2]; } // namespace internal } // namespace simdjson @@ -5048,8 +6035,6 @@ SIMDJSON_DLLIMPORTEXPORT const double simdjson::internal::power_of_ten[] = { * exactly using the binary notation, only the powers of five * affect the binary significand. */ - - // The truncated powers of five from 5^-342 all the way to 5^308 // The mantissa is truncated to 128 bits, and // never rounded up. Uses about 10KB. @@ -5706,6 +6691,7 @@ SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= 0xe3d8f9e563a198e5,0x58180fddd97723a6, 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; + #endif // SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP /* end file internal/numberparsing_tables.cpp */ /* including internal/simdprune_tables.cpp: #include */ @@ -8178,6 +9164,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -8254,7 +9246,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -8267,7 +9259,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -8561,7 +9558,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -8590,6 +9587,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -9009,12 +10032,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -9023,6 +10051,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -9064,6 +10102,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -9246,7 +10288,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -9279,7 +10326,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -9440,7 +10491,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -9468,7 +10519,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -9557,7 +10608,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -9620,13 +10671,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -9652,7 +10703,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -9726,7 +10777,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -10187,6 +11247,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -10421,12 +11487,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -10945,6 +12040,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -11021,7 +12122,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -11034,7 +12135,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -11328,7 +12434,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -11357,6 +12463,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -12064,7 +13196,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -12189,7 +13321,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -12237,7 +13369,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -12391,7 +13523,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -12401,7 +13533,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -12610,7 +13742,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -13462,6 +14594,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for arm64 */ /* including generic/stage2/stringparsing.h for arm64: #include */ /* begin file generic/stage2/stringparsing.h for arm64 */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -13614,7 +14747,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -13659,7 +14793,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -13701,6 +14836,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -14928,7 +16064,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -14952,6 +16088,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -15369,12 +16530,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -15383,6 +16549,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -15424,6 +16600,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -15606,7 +16786,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -15639,7 +16824,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -15800,7 +16989,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -15828,7 +17017,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -15917,7 +17106,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -15980,13 +17169,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -16012,7 +17201,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -16086,7 +17275,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -16547,6 +17745,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -16781,12 +17985,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -17566,7 +18799,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -17590,6 +18823,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -18295,7 +19553,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -18420,7 +19678,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -18468,7 +19726,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -18622,7 +19880,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -18632,7 +19890,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -18841,7 +20099,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -19693,6 +20951,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for haswell */ /* including generic/stage2/stringparsing.h for haswell: #include */ /* begin file generic/stage2/stringparsing.h for haswell */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -19845,7 +21104,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -19890,7 +21150,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -19932,6 +21193,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -20773,7 +22035,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -21092,7 +22353,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -21116,6 +22377,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -21593,12 +22883,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -21607,6 +22902,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -21648,6 +22953,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -21830,7 +23139,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -21863,7 +23177,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -22024,7 +23342,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -22052,7 +23370,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -22141,7 +23459,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -22204,13 +23522,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -22236,7 +23554,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -22310,7 +23628,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -22771,6 +24098,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -23005,12 +24338,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -23407,7 +24769,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -23726,7 +25087,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -23750,6 +25111,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -24515,7 +25905,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -24640,7 +26030,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -24688,7 +26078,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -24842,7 +26232,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -24852,7 +26242,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -25061,7 +26451,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -25913,6 +27303,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for icelake */ /* including generic/stage2/stringparsing.h for icelake: #include */ /* begin file generic/stage2/stringparsing.h for icelake */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -26065,7 +27456,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -26110,7 +27502,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -26152,6 +27545,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -27513,7 +28907,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { @@ -27554,6 +28948,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -27973,12 +29393,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -27987,6 +29412,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -28028,6 +29463,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -28210,7 +29649,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -28243,7 +29687,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -28404,7 +29852,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -28432,7 +29880,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -28521,7 +29969,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -28584,13 +30032,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -28616,7 +30064,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -28690,7 +30138,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -29151,6 +30608,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -29385,12 +30848,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -30262,7 +31754,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { @@ -30303,6 +31795,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -31010,7 +32528,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -31135,7 +32653,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -31183,7 +32701,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -31337,7 +32855,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -31347,7 +32865,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -31556,7 +33074,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -32408,6 +33926,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for ppc64 */ /* including generic/stage2/stringparsing.h for ppc64: #include */ /* begin file generic/stage2/stringparsing.h for ppc64 */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -32560,7 +34079,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -32605,7 +34125,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -32647,6 +34168,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -34272,7 +35794,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -34298,6 +35820,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -34715,12 +36262,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -34729,6 +36281,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -34770,6 +36332,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -34952,7 +36518,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -34985,7 +36556,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -35146,7 +36721,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -35174,7 +36749,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -35263,7 +36838,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -35326,13 +36901,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -35358,7 +36933,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -35432,7 +37007,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -35893,6 +37477,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -36127,12 +37717,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -37336,7 +38955,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -37362,6 +38981,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -38067,7 +39711,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -38192,7 +39836,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -38240,7 +39884,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -38394,7 +40038,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -38404,7 +40048,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -38613,7 +40257,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -39465,6 +41109,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for westmere */ /* including generic/stage2/stringparsing.h for westmere: #include */ /* begin file generic/stage2/stringparsing.h for westmere */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -39617,7 +41262,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -39662,7 +41308,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -39704,6 +41351,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -40232,7 +41880,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * if (error) { return error; } return stage2(_doc); } - } // namespace westmere } // namespace simdjson @@ -40833,7 +42480,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -40862,6 +42509,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + static_cast((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -41281,12 +42953,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -41295,6 +42972,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -41336,6 +43023,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -41518,7 +43209,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -41551,7 +43247,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -41712,7 +43412,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -41740,7 +43440,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -41829,7 +43529,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -41892,13 +43592,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -41924,7 +43624,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -41998,7 +43698,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -42459,6 +44168,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -42693,12 +44408,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -43367,7 +45111,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -43396,6 +45140,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + static_cast((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -44103,7 +45872,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -44228,7 +45997,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -44276,7 +46045,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -44430,7 +46199,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -44440,7 +46209,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -44649,7 +46418,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -45501,6 +47270,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for lsx */ /* including generic/stage2/stringparsing.h for lsx: #include */ /* begin file generic/stage2/stringparsing.h for lsx */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -45653,7 +47423,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -45698,7 +47469,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -45740,6 +47512,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -46850,7 +48623,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -46873,6 +48646,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -47292,12 +49090,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -47306,6 +49109,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -47347,6 +49160,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -47529,7 +49346,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -47562,7 +49384,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -47723,7 +49549,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -47751,7 +49577,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -47840,7 +49666,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -47903,13 +49729,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -47935,7 +49761,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -48009,7 +49835,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -48470,6 +50305,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -48704,12 +50545,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -49400,7 +51270,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -49423,6 +51293,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -50130,7 +52025,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_inline error_code errors() { + simdjson_warn_unused simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -50255,7 +52150,7 @@ class json_scanner { json_scanner() = default; simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_inline error_code finish(); + simdjson_warn_unused simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -50303,7 +52198,7 @@ simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) ); } -simdjson_inline error_code json_scanner::finish() { +simdjson_warn_unused simdjson_inline error_code json_scanner::finish() { return string_scanner.finish(); } @@ -50457,7 +52352,7 @@ class json_minifier { template simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_warn_unused simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; @@ -50467,7 +52362,7 @@ simdjson_inline void json_minifier::next(const simd::simd8x64& in, cons dst += in.compress(mask, dst); } -simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_warn_unused simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -50676,7 +52571,7 @@ class json_structural_indexer { template simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_warn_unused simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -51528,6 +53423,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for lasx */ /* including generic/stage2/stringparsing.h for lasx: #include */ /* begin file generic/stage2/stringparsing.h for lasx */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51680,7 +53576,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -51725,7 +53622,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -51767,6 +53665,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -52380,7 +54279,7 @@ namespace { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return c == '"'; } simdjson_inline bool has_backslash() { return c == '\\'; } @@ -52396,6 +54295,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -52902,12 +54819,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -52916,6 +54838,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -52957,6 +54889,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -53139,7 +55075,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -53172,7 +55113,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -53333,7 +55278,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -53361,7 +55306,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -53450,7 +55395,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -53513,13 +55458,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -53545,7 +55490,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -53619,7 +55564,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -54080,6 +56034,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -54314,12 +56274,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -54519,7 +56508,7 @@ namespace { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return c == '"'; } simdjson_inline bool has_backslash() { return c == '\\'; } @@ -54535,6 +56524,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -54741,6 +56748,7 @@ simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &par /* end file generic/stage1/find_next_document_index.h for fallback */ /* including generic/stage2/stringparsing.h for fallback: #include */ /* begin file generic/stage2/stringparsing.h for fallback */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54893,7 +56901,8 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -54938,7 +56947,8 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t // It is not ideal that this function is nearly identical to parse_string. while (1) { // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + auto b = backslash_and_quote{}; + auto bs_quote = b.copy_and_find(src, dst); // If the next thing is the end quote, copy and return if (bs_quote.has_quote_first()) { // we encountered quotes first. Move dst to point to quotes and exit @@ -54980,6 +56990,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -55946,10 +57957,78 @@ simdjson_inline void validate_utf8_character() { idx += 4; } +static const uint8_t CHAR_TYPE_SPACE = 1 << 0; +static const uint8_t CHAR_TYPE_OPERATOR = 1 << 1; +static const uint8_t CHAR_TYPE_ESC_ASCII = 1 << 2; +static const uint8_t CHAR_TYPE_NON_ASCII = 1 << 3; + +const uint8_t char_table[256] = { + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x04, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 +}; + +simdjson_inline bool char_is_type(uint8_t c, uint8_t type) { + return (char_table[c] & type); +} + +simdjson_inline bool char_is_space(uint8_t c) { + return char_is_type(c, CHAR_TYPE_SPACE); +} + +simdjson_inline bool char_is_operator(uint8_t c) { + return char_is_type(c, CHAR_TYPE_OPERATOR); +} + +simdjson_inline bool char_is_space_or_operator(uint8_t c) { + return char_is_type(c, CHAR_TYPE_SPACE | CHAR_TYPE_OPERATOR); +} + +simdjson_inline bool char_is_ascii_stop(uint8_t c) { + return char_is_type(c, CHAR_TYPE_ESC_ASCII | CHAR_TYPE_NON_ASCII); +} + // Returns true if the string is unclosed. simdjson_inline bool validate_string() { idx++; // skip first quote - while (idx < len && buf[idx] != '"') { + while (idx < len) { + do { + if (char_is_ascii_stop(buf[idx])) { break; } + idx++; + } while (idx < len); + if (idx >= len) { return true; } + if (buf[idx] == '"') { + return false; + } if (buf[idx] == '\\') { idx += 2; } else if (simdjson_unlikely(buf[idx] & 0x80)) { @@ -55963,43 +58042,31 @@ simdjson_inline bool validate_string() { return false; } -simdjson_inline bool is_whitespace_or_operator(uint8_t c) { - switch (c) { - case '{': case '}': case '[': case ']': case ',': case ':': - case ' ': case '\r': case '\n': case '\t': - return true; - default: - return false; - } -} - // // Parse the entire input in STEP_SIZE-byte chunks. // -simdjson_inline error_code scan() { +simdjson_warn_unused simdjson_inline error_code scan() { bool unclosed_string = false; for (;idx= len) { break; } + // String + if (buf[idx] == '"') { + add_structural(); + unclosed_string |= validate_string(); + // Operator + } else if (char_is_operator(buf[idx])) { + add_structural(); + // Primitive or invalid character (invalid characters will be checked in stage 2) + } else { + // Anything else, add the structural and go until we find the next one + add_structural(); + while (idx+1= 202402L) // update when the standard is finalized +#define SIMDJSON_CPLUSPLUS26 1 +#endif + // C++ 23 #if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) #define SIMDJSON_CPLUSPLUS23 1 @@ -97,30 +102,71 @@ #endif #endif +#ifndef SIMDJSON_CONSTEXPR_LAMBDA +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_CONSTEXPR_LAMBDA constexpr +#else +#define SIMDJSON_CONSTEXPR_LAMBDA +#endif +#endif + + + #ifdef __has_include #if __has_include() #include #endif #endif +// The current specification is unclear on how we detect +// static reflection, both __cpp_lib_reflection and +// __cpp_impl_reflection are proposed in the draft specification. +// For now, we disable static reflect by default. It must be +// specified at compiler time. +#ifndef SIMDJSON_STATIC_REFLECTION +#define SIMDJSON_STATIC_REFLECTION 0 // disabled by default. +#endif + #if defined(__apple_build_version__) #if __apple_build_version__ < 14000000 #define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to #endif #endif +#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L +#include +#define SIMDJSON_SUPPORTS_RANGES 1 +#else +#define SIMDJSON_SUPPORTS_RANGES 0 +#endif #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #if __cpp_concepts >= 201907L #include -#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#define SIMDJSON_SUPPORTS_CONCEPTS 1 #else -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +// copy SIMDJSON_SUPPORTS_CONCEPTS to SIMDJSON_SUPPORTS_DESERIALIZATION. +#if SIMDJSON_SUPPORTS_CONCEPTS +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif + + +#if !defined(SIMDJSON_CONSTEVAL) +#if defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#define SIMDJSON_CONSTEVAL 1 +#else +#define SIMDJSON_CONSTEVAL 0 +#endif // defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#endif // !defined(SIMDJSON_CONSTEVAL) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -172,6 +218,22 @@ using std::size_t; #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 + #if __riscv_v_intrinsic >= 11000 + #define SIMDJSON_HAS_RVV_INTRINSICS 1 + #endif + + #define SIMDJSON_HAS_ZVBB_INTRINSICS \ + 0 // there is currently no way to detect this + + #if SIMDJSON_HAS_RVV_INTRINSICS && __riscv_vector && \ + __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 + // RISC-V V extension + #define SIMDJSON_IS_RVV 1 + #if SIMDJSON_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 + // RISC-V Vector Basic Bit-manipulation + #define SIMDJSON_IS_ZVBB 1 + #endif + #endif #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) @@ -318,6 +380,7 @@ using std::size_t; #if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, // then do away with asserts and use __assume. +// We still recommend that our users set NDEBUG in release mode. #if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) @@ -393,7 +456,7 @@ double from_chars(const char *first, const char* end) noexcept; } #ifndef SIMDJSON_EXCEPTIONS -#if __cpp_exceptions +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 @@ -569,17 +632,6 @@ double from_chars(const char *first, const char* end) noexcept; // We assume by default static linkage #define SIMDJSON_DLLIMPORTEXPORT #endif - -/** - * Workaround for the vcpkg package manager. Only vcpkg should - * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. - */ -#if SIMDJSON_USING_LIBRARY -#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) -#endif -/** - * End of workaround for the vcpkg package manager. - */ #else #define SIMDJSON_DLLIMPORTEXPORT #endif @@ -596,12 +648,14 @@ double from_chars(const char *first, const char* end) noexcept; // even if we do not have C++17 support. #ifdef __cpp_lib_string_view #define SIMDJSON_HAS_STRING_VIEW +#include #endif // Some systems have string_view even if we do not have C++17 support, // and even if __cpp_lib_string_view is undefined, it is the case // with Apple clang version 11. // We must handle it. *This is important.* +#ifndef _MSC_VER #ifndef SIMDJSON_HAS_STRING_VIEW #if defined __has_include // do not combine the next #if with the previous one (unsafe) @@ -617,6 +671,7 @@ double from_chars(const char *first, const char* end) noexcept; #endif // __has_include () #endif // defined __has_include #endif // def SIMDJSON_HAS_STRING_VIEW +#endif // def _MSC_VER // end of complicated but important routine to try to detect string_view. // @@ -1039,9 +1094,9 @@ using std::operator<<; #endif #if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] +# define nssv_nodiscard simdjson_warn_unused #else -# define nssv_nodiscard /*[[nodiscard]]*/ +# define nssv_nodiscard /*simdjson_warn_unused*/ #endif // Additional includes: @@ -2359,16 +2414,25 @@ namespace std { // It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer // sets _DEBUG in a release build under Visual Studio, or if some compiler fails to // set the __OPTIMIZE__ macro). +// We make it so that if NDEBUG is defined, then SIMDJSON_DEVELOPMENT_CHECKS +// is not defined, irrespective of the compiler. +// We recommend that users set NDEBUG in release builds, so that +// SIMDJSON_DEVELOPMENT_CHECKS is not defined in release builds by default, +// irrespective of the compiler. #ifndef SIMDJSON_DEVELOPMENT_CHECKS #ifdef _MSC_VER // Visual Studio seems to set _DEBUG for debug builds. -#ifdef _DEBUG +// We set SIMDJSON_DEVELOPMENT_CHECKS to 1 if _DEBUG is defined +// and NDEBUG is not defined. +#if defined(_DEBUG) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // _DEBUG #else // _MSC_VER // All other compilers appear to set __OPTIMIZE__ to a positive integer // when the compiler is optimizing. -#ifndef __OPTIMIZE__ +// We only set SIMDJSON_DEVELOPMENT_CHECKS if both __OPTIMIZE__ +// and NDEBUG are not defined. +#if !defined(__OPTIMIZE__) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER @@ -2424,6 +2488,18 @@ namespace std { #define SIMDJSON_AVX512_ALLOWED 1 #endif + +#ifndef __has_cpp_attribute +#define simdjson_lifetime_bound +#elif __has_cpp_attribute(msvc::lifetimebound) +#define simdjson_lifetime_bound [[msvc::lifetimebound]] +#elif __has_cpp_attribute(clang::lifetimebound) +#define simdjson_lifetime_bound [[clang::lifetimebound]] +#elif __has_cpp_attribute(lifetimebound) +#define simdjson_lifetime_bound [[lifetimebound]] +#else +#define simdjson_lifetime_bound +#endif #endif // SIMDJSON_COMMON_DEFS_H /* end file simdjson/common_defs.h */ @@ -2437,22 +2513,22 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.12.3" +#define SIMDJSON_VERSION "4.2.4" namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ - SIMDJSON_VERSION_MAJOR = 3, + SIMDJSON_VERSION_MAJOR = 4, /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 12, + SIMDJSON_VERSION_MINOR = 2, /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 3 + SIMDJSON_VERSION_REVISION = 4 }; } // namespace simdjson @@ -2523,7 +2599,8 @@ enum error_code { SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input - NUM_ERROR_CODES + OUT_OF_CAPACITY, ///< The capacity was exceeded, we cannot allocate enough memory. + NUM_ERROR_CODES ///< Placeholder for end of error code list. }; /** @@ -2581,6 +2658,10 @@ namespace internal { /** * The result of a simdjson operation that could fail. * + * IMPORTANT: For the ondemand API, we use implementation_simdjson_result_base as a base class + * to avoid some compilation issue. Thus, if you modify this class, please ensure that the ondemand + * implementation_simdjson_result_base is also modified. + * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for @@ -2641,8 +2722,27 @@ struct simdjson_result_base : protected std::pair { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Dereference operator to access the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + /** * Get the result value. * @@ -2670,20 +2770,53 @@ struct simdjson_result_base : protected std::pair { * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); + #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error, return, exit, abort + * } else { + * // use value here. + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline T&& value_unsafe() && noexcept; + using value_type = T; + using error_type = error_code; }; // struct simdjson_result_base } // namespace internal @@ -2695,6 +2828,7 @@ struct simdjson_result_base : protected std::pair { */ template struct simdjson_result : public internal::simdjson_result_base { + /** * @private Create a new empty result with error = UNINITIALIZED. */ @@ -2727,13 +2861,32 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + /** + * Copy the value to a provided std::string, only enabled for std::string_view. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(std::string &value) && noexcept { + static_assert(std::is_same::value, "SFINAE"); + std::string_view v; + error_code error = std::forward>(*this).get(v); + if (!error) { + value.assign(v.data(), v.size()); + } + return error; + } + /** * The error. */ simdjson_inline error_code error() const noexcept; -#if SIMDJSON_EXCEPTIONS + +#if SIMDJSON_EXCEPTIONS + using internal::simdjson_result_base::operator*; + using internal::simdjson_result_base::operator->; /** * Get the result value. * @@ -2775,6 +2928,8 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_inline T&& value_unsafe() && noexcept; + using value_type = T; + using error_type = error_code; }; // struct simdjson_result #if SIMDJSON_EXCEPTIONS @@ -2804,7 +2959,7 @@ inline const std::string error_message(int error) noexcept; /* begin file simdjson/concepts.h */ #ifndef SIMDJSON_CONCEPTS_H #define SIMDJSON_CONCEPTS_H -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #include #include @@ -2836,7 +2991,9 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details - +template +concept is_pair = requires { typename T::first_type; typename T::second_type; } && + std::same_as>; template concept string_view_like = std::is_convertible_v && !std::is_convertible_v; @@ -2919,21 +3076,166 @@ concept optional_type = requires(std::remove_cvref_t obj) { { obj.value() } -> std::same_as::value_type&>; requires requires(typename std::remove_cvref_t::value_type &&val) { obj.emplace(std::move(val)); - obj = std::move(val); { obj.value_or(val) } -> std::convertible_to::value_type>; }; { static_cast(obj) } -> std::same_as; // convertible to bool + { obj.reset() } noexcept -> std::same_as; +}; + + + +// Types we serialize as JSON strings (not as containers) +template +concept string_like = + std::is_same_v, std::string> || + std::is_same_v, std::string_view> || + std::is_same_v, const char*> || + std::is_same_v, char*>; + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +// Now uses iterator-based approach for broader container support +template +concept container_but_not_string = + std::ranges::input_range && !string_like && !concepts::string_view_keyed_map; + + + +// Concept: Indexable container that is not a string or associative container +// Accepts: std::vector, std::array, std::deque (have operator[], value_type, not string_like) +// Rejects: std::string (string_like), std::list (no operator[]), std::map (has key_type) +template +concept indexable_container = requires { + typename Container::value_type; + requires !concepts::string_like; + requires !requires { typename Container::key_type; }; // Reject maps/sets + requires requires(Container& c, std::size_t i) { + { c[i] } -> std::convertible_to; + }; }; +// Variable template to use with std::meta::substitute +template +constexpr bool indexable_container_v = indexable_container; } // namespace concepts + + +/** + * We use tag_invoke as our customization point mechanism. + */ +template +concept tag_invocable = requires(Tag tag, Args... args) { + tag_invoke(std::forward(tag), std::forward(args)...); +}; + +template +concept nothrow_tag_invocable = + tag_invocable && requires(Tag tag, Args... args) { + { + tag_invoke(std::forward(tag), std::forward(args)...) + } noexcept; + }; + } // namespace simdjson -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS #endif // SIMDJSON_CONCEPTS_H /* end file simdjson/concepts.h */ +/* including simdjson/constevalutil.h: #include "simdjson/constevalutil.h" */ +/* begin file simdjson/constevalutil.h */ +#ifndef SIMDJSON_CONSTEVALUTIL_H +#define SIMDJSON_CONSTEVALUTIL_H + +#include +#include +#include + +namespace simdjson { +namespace constevalutil { +#if SIMDJSON_CONSTEVAL + +constexpr static std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +constexpr static std::array control_chars = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; +// unoptimized, meant for compile-time execution +consteval std::string consteval_to_quoted_escaped(std::string_view input) { + std::string out = "\""; + for (char c : input) { + if (json_quotable_character[uint8_t(c)]) { + if (c == '"') { + out.append("\\\""); + } else if (c == '\\') { + out.append("\\\\"); + } else { + std::string_view v = control_chars[uint8_t(c)]; + out.append(v); + } + } else { + out.push_back(c); + } + } + out.push_back('"'); + return out; +} +#endif // SIMDJSON_CONSTEVAL + + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +struct fixed_string { + constexpr fixed_string(const char (&str)[N]) { + for (std::size_t i = 0; i < N; ++i) { + data[i] = str[i]; + } + } + char data[N]; + constexpr std::string_view view() const { return {data, N - 1}; } + constexpr size_t size() const { return N ; } + constexpr operator std::string_view() const { return view(); } + constexpr char operator[](std::size_t index) const { return data[index]; } + constexpr bool operator==(const fixed_string& other) const { + if (N != other.size()) { + return false; + } + for (std::size_t i = 0; i < N; ++i) { + if (data[i] != other.data[i]) { + return false; + } + } + return true; + } +}; +template +fixed_string(const char (&)[N]) -> fixed_string; + +template +struct string_constant { + static constexpr std::string_view value = str.view(); +}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace constevalutil +} // namespace simdjson +#endif // SIMDJSON_CONSTEVALUTIL_H +/* end file simdjson/constevalutil.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -3057,8 +3359,38 @@ simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_inline bool simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -3083,6 +3415,7 @@ simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { #endif // SIMDJSON_EXCEPTIONS + template simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; @@ -3118,7 +3451,8 @@ simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noe } template -simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code +simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } @@ -3835,6 +4169,19 @@ struct padded_string final { **/ inline static simdjson_result load(std::string_view path) noexcept; + #if defined(_WIN32) && SIMDJSON_CPLUSPLUS17 + /** + * This function accepts a wide string path (UTF-16) and converts it to + * UTF-8 before loading the file. This allows windows users to work + * with unicode file paths without manually converting the paths everytime. + * + * @return IO_ERROR on error, including conversion failures. + * + * @param path the path to the file as a wide string. + **/ + inline static simdjson_result load(std::wstring_view path) noexcept; + #endif + private: padded_string &operator=(const padded_string &o) = delete; padded_string(const padded_string &o) = delete; @@ -3985,6 +4332,9 @@ class padded_string_view : public std::string_view { /** The number of allocated bytes. */ inline size_t capacity() const noexcept; + /** check that the view has sufficient padding */ + inline bool has_sufficient_padding() const noexcept; + /** * Remove the UTF-8 Byte Order Mark (BOM) if it exists. * @@ -4011,14 +4361,24 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result= SIMDJSON_PADDING) { + return true; + } + size_t missing_padding = SIMDJSON_PADDING - padding(); + if(length() < missing_padding) { return false; } + + for (size_t i = length() - missing_padding; i < length(); i++) { + char c = data()[i]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + return false; + } + } + return true; +} + inline size_t padded_string_view::capacity() const noexcept { return _capacity; } inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } @@ -4083,10 +4459,33 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result 0; i--) { + char c = s[i - 1]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + existing_padding++; + } else { + break; + } + } + size_t needed_padding = 0; + if (existing_padding < SIMDJSON_PADDING) { + needed_padding = SIMDJSON_PADDING - existing_padding; + s.append(needed_padding, ' '); + } + + return padded_string_view(s.data(), s.size() - needed_padding, s.size()); } + +inline padded_string_view pad_with_reserve(std::string& s) noexcept { + if (s.capacity() - s.size() < SIMDJSON_PADDING) { + s.reserve(s.size() + SIMDJSON_PADDING ); + } + return padded_string_view(s.data(), s.size(), s.capacity()); +} + + + } // namespace simdjson @@ -4094,6 +4493,7 @@ inline padded_string_view pad(std::string& s) noexcept { /* end file simdjson/padded_string_view-inl.h */ #include +#include namespace simdjson { namespace internal { @@ -4211,9 +4611,9 @@ inline const char *padded_string::data() const noexcept { return data_ptr; } inline char *padded_string::data() noexcept { return data_ptr; } -inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } +inline padded_string::operator std::string_view() const simdjson_lifetime_bound { return std::string_view(data(), length()); } -inline padded_string::operator padded_string_view() const noexcept { +inline padded_string::operator padded_string_view() const noexcept simdjson_lifetime_bound { return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); } @@ -4271,6 +4671,62 @@ inline simdjson_result padded_string::load(std::string_view filen return s; } +#if defined(_WIN32) && SIMDJSON_CPLUSPLUS17 +inline simdjson_result padded_string::load(std::wstring_view filename) noexcept { + // Open the file using the wide characters + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = _wfopen(filename.data(), L"rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 llen = _ftelli64(fp); + if(llen == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long llen = std::ftell(fp); + if((llen < 0) || (llen == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Allocate the padded_string + size_t len = static_cast(llen); + padded_string s(len); + if (s.data() == nullptr) { + std::fclose(fp); + return MEMALLOC; + } + + // Read the padded_string + std::rewind(fp); + size_t bytes_read = std::fread(s.data(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != len) { + return IO_ERROR; + } + + return s; +} +#endif + } // namespace simdjson inline simdjson::padded_string operator ""_padded(const char *str, size_t len) { @@ -4278,7 +4734,7 @@ inline simdjson::padded_string operator ""_padded(const char *str, size_t len) { } #ifdef __cpp_char8_t inline simdjson::padded_string operator ""_padded(const char8_t *str, size_t len) { - return simdjson::padded_string(reinterpret_cast(str), len); + return simdjson::padded_string(reinterpret_cast(str), len); } #endif #endif // SIMDJSON_PADDED_STRING_INL_H @@ -4353,6 +4809,8 @@ class tape_ref; #ifndef SIMDJSON_DOM_ARRAY_H #define SIMDJSON_DOM_ARRAY_H +#include + /* skipped duplicate #include "simdjson/dom/base.h" */ /* including simdjson/internal/tape_ref.h: #include "simdjson/internal/tape_ref.h" */ /* begin file simdjson/internal/tape_ref.h */ @@ -4460,7 +4918,7 @@ class array { iterator& operator=(const iterator&) noexcept = default; private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class array; }; @@ -4511,12 +4969,23 @@ class array { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Recursive function which processes the JSON path of each child element + */ + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + + + /** + * Adds support for JSONPath expression with wildcards '*' + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -4544,6 +5013,15 @@ class array { */ inline simdjson_result at(size_t index) const noexcept; + /** + * Gets the values of items in an array element + * This function has linear-time complexity: the values are checked one by one. + * + * @return The child elements of an array + */ + + inline std::vector& get_values(std::vector& out) const noexcept; + /** * Implicitly convert object to element */ @@ -4551,7 +5029,7 @@ class array { private: simdjson_inline array(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class element; friend struct simdjson_result; template @@ -4570,8 +5048,11 @@ struct simdjson_result : public internal::simdjson_result_base at_pointer(std::string_view json_pointer) const noexcept; + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at(size_t index) const noexcept; + inline std::vector& get_values(std::vector& out) const noexcept; #if SIMDJSON_EXCEPTIONS inline dom::array::iterator begin() const noexcept(false); @@ -4584,9 +5065,7 @@ struct simdjson_result : public internal::simdjson_result_base - +#if SIMDJSON_SUPPORTS_RANGES namespace std { namespace ranges { template<> @@ -4597,7 +5076,7 @@ inline constexpr bool enable_view load(const std::string &path) & noexcept; - inline simdjson_result load(const std::string &path) && = delete ; + inline simdjson_result load(std::string_view path) & noexcept; + inline simdjson_result load(std::string_view path) && = delete ; /** * Load a JSON document from a file into a provide document instance and return a temporary reference to it. @@ -4852,8 +5331,8 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result load_into_document(document& doc, const std::string &path) & noexcept; - inline simdjson_result load_into_document(document& doc, const std::string &path) && =delete; + inline simdjson_result load_into_document(document& doc, std::string_view path) & noexcept; + inline simdjson_result load_into_document(document& doc, std::string_view path) && =delete; /** * Parse a JSON document and return a temporary reference to it. @@ -4908,10 +5387,7 @@ class parser { * * ### std::string references * - * If you pass a mutable std::string reference (std::string&), the parser will seek to extend - * its capacity to SIMDJSON_PADDING bytes beyond the end of the string. - * - * Whenever you pass an std::string reference, the parser will access the bytes beyond the end of + * Whenever you pass an std::string reference, the parser may access the bytes beyond the end of * the string but before the end of the allocated memory (std::string::capacity()). * If you are using a sanitizer that checks for reading uninitialized bytes or std::string's * container-overflow checks, you may encounter sanitizer warnings. @@ -4943,7 +5419,7 @@ class parser { /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; - /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + /** @overload parse(const std::string &) */ simdjson_inline simdjson_result parse(const std::string &s) & noexcept; simdjson_inline simdjson_result parse(const std::string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ @@ -5090,7 +5566,7 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result load_many(std::string_view path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. @@ -5361,7 +5837,7 @@ class parser { inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; /** Read the file into loaded_bytes */ - inline simdjson_result read_file(const std::string &path) noexcept; + inline simdjson_result read_file(std::string_view path) noexcept; friend class parser::Iterator; friend class document_stream; @@ -5445,7 +5921,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * error = parser.parse_many(json).get(docs); * ``` @@ -5698,6 +6174,8 @@ struct simdjson_result : public internal::simdjson_result_ #ifndef SIMDJSON_DOM_ELEMENT_H #define SIMDJSON_DOM_ELEMENT_H +#include + /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/array.h" */ @@ -6096,12 +6574,14 @@ class element { */ inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + inline simdjson_result> at_path_with_wildcard(const std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -6189,7 +6669,7 @@ class element { private: simdjson_inline element(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class document; friend class object; friend class array; @@ -6241,6 +6721,7 @@ struct simdjson_result : public internal::simdjson_result_base operator[](const char *key) const noexcept; simdjson_result operator[](int) const noexcept = delete; simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(const std::string_view json_path) const noexcept; simdjson_inline simdjson_result at_path(const std::string_view json_path) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; @@ -6272,6 +6753,8 @@ struct simdjson_result : public internal::simdjson_result_base + /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref.h" */ @@ -6360,7 +6843,7 @@ class object { private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class object; }; @@ -6443,12 +6926,22 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Recursive function which processes the JSON path of each child element + */ + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + + /** + * Adds support for JSONPath expression with wildcards '*' + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -6474,6 +6967,14 @@ class object { */ inline simdjson_result at_key(std::string_view key) const noexcept; + /** + * Gets the values associated with keys of an object + * This function has linear-time complexity: the keys are checked one by one. + * + * @return the values associated with each key of an object + */ + inline std::vector& get_values(std::vector& out) const noexcept; + /** * Get the value associated with the given key in a case-insensitive manner. * It is only guaranteed to work over ASCII inputs. @@ -6495,7 +6996,7 @@ class object { private: simdjson_inline object(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class element; friend struct simdjson_result; @@ -6532,8 +7033,11 @@ struct simdjson_result : public internal::simdjson_result_base operator[](const char *key) const noexcept; simdjson_result operator[](int) const noexcept = delete; inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + inline simdjson_result> at_path_with_wildcard(std::string_view json_path_new) const noexcept; inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at_key(std::string_view key) const noexcept; + inline std::vector& get_values(std::vector& out) const noexcept; inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS @@ -6545,9 +7049,7 @@ struct simdjson_result : public internal::simdjson_result_base - +#if SIMDJSON_SUPPORTS_RANGES namespace std { namespace ranges { template<> @@ -6558,7 +7060,7 @@ inline constexpr bool enable_view - namespace simdjson { /** @@ -6583,8 +7083,7 @@ namespace simdjson { */ namespace internal { -template -class base_formatter { +template class base_formatter { public: /** Add a comma **/ simdjson_inline void comma(); @@ -6623,24 +7122,76 @@ class base_formatter { /** Prints one character **/ simdjson_inline void one_char(char c); + /** Prints characters in [begin, end) verbatim. **/ + simdjson_inline void chars(const char *begin, const char *end); + simdjson_inline void call_print_newline() { - static_cast(this)->print_newline(); + static_cast(this)->print_newline(); } simdjson_inline void call_print_indents(size_t depth) { - static_cast(this)->print_indents(depth); + static_cast(this)->print_indents(depth); } simdjson_inline void call_print_space() { - static_cast(this)->print_space(); + static_cast(this)->print_space(); } protected: // implementation details (subject to change) /** Backing buffer **/ - std::vector buffer{}; // not ideal! -}; + struct vector_with_small_buffer { + vector_with_small_buffer() = default; + ~vector_with_small_buffer() { free_buffer(); } + vector_with_small_buffer(const vector_with_small_buffer &) = delete; + vector_with_small_buffer & + operator=(const vector_with_small_buffer &) = delete; + + void clear() { + size = 0; + capacity = StaticCapacity; + free_buffer(); + buffer = array; + } + + simdjson_inline void push_back(char c) { + if (capacity < size + 1) + grow(capacity * 2); + buffer[size++] = c; + } + + simdjson_inline void append(const char *begin, const char *end) { + const size_t new_size = size + (end - begin); + if (capacity < new_size) + // std::max(new_size, capacity * 2); is broken in tests on Windows + grow(new_size < capacity * 2 ? capacity * 2 : new_size); + std::copy(begin, end, buffer + size); + size = new_size; + } + + std::string_view str() const { return std::string_view(buffer, size); } + + private: + void free_buffer() { + if (buffer != array) + delete[] buffer; + } + void grow(size_t new_capacity) { + auto new_buffer = new char[new_capacity]; + std::copy(buffer, buffer + size, new_buffer); + free_buffer(); + buffer = new_buffer; + capacity = new_capacity; + } + + static const size_t StaticCapacity = 64; + char array[StaticCapacity]; + char *buffer = array; + size_t size = 0; + size_t capacity = StaticCapacity; + } buffer{}; +}; /** * @private This is the class that we expect to use with the string_builder @@ -6674,9 +7225,11 @@ class pretty_formatter : public base_formatter { * by a "formatter" which handles the details. Thus * the string_builder template could support both minification * and prettification, and various other tradeoffs. + * + * This is not to be confused with the simdjson::builder::string_builder + * which is a different class. */ -template -class string_builder { +template class string_builder { public: /** Construct an initially empty builder, would print the empty string **/ string_builder() = default; @@ -6698,11 +7251,12 @@ class string_builder { simdjson_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ simdjson_inline void append(simdjson::dom::key_value_pair value); + private: formatter format{}; }; -} // internal +} // namespace internal namespace dom { @@ -6711,33 +7265,43 @@ namespace dom { * * @param out The output stream. * @param value The element. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::element value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::array value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The object. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::object value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif } // namespace dom @@ -6749,47 +7313,47 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result -std::string to_string(T x) { - // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ - // Currently minify and to_string are identical but in the future, they may - // differ. - simdjson::internal::string_builder<> sb; - sb.append(x); - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); +template std::string to_string(T x) { + // in C++, to_string is standard: + // http://www.cplusplus.com/reference/string/to_string/ Currently minify and + // to_string are identical but in the future, they may differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS -template -std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string to_string(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif /** - * Minifies a JSON element or document, printing the smallest possible valid JSON. + * Minifies a JSON element or document, printing the smallest possible valid + * JSON. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); * cout << minify(doc) << endl; // prints [1,2,3] * */ -template -std::string minify(T x) { - return to_string(x); -} +template std::string minify(T x) { return to_string(x); } #if SIMDJSON_EXCEPTIONS -template -std::string minify(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string minify(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif /** - * Prettifies a JSON element or document, printing the valid JSON with indentation. + * Prettifies a JSON element or document, printing the valid JSON with + * indentation. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); @@ -6805,25 +7369,24 @@ std::string minify(simdjson_result x) { * cout << prettify(doc) << endl; * */ -template -std::string prettify(T x) { - simdjson::internal::string_builder sb; - sb.append(x); - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); +template std::string prettify(T x) { + simdjson::internal::string_builder sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS -template -std::string prettify(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string prettify(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif } // namespace simdjson - #endif /* end file simdjson/dom/serialization.h */ @@ -6845,7 +7408,9 @@ std::string prettify(simdjson_result x) { #define SIMDJSON_JSONPATHUTIL_H #include -#include +/* skipped duplicate #include "simdjson/common_defs.h" */ + +#include namespace simdjson { /** @@ -6855,12 +7420,12 @@ namespace simdjson { */ inline std::string json_path_to_pointer_conversion(std::string_view json_path) { size_t i = 0; - // if JSONPath starts with $, skip it + // json_path.starts_with('$') requires C++20. if (!json_path.empty() && json_path.front() == '$') { i = 1; } - if (json_path.empty() || (json_path[i] != '.' && + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { return "-1"; // This is just a sentinel value, the caller should check for this and return an error. } @@ -6903,6 +7468,49 @@ inline std::string json_path_to_pointer_conversion(std::string_view json_path) { return result; } + +inline std::pair get_next_key_and_json_path(std::string_view& json_path) { + std::string_view key; + + if (json_path.empty()) { + return {key, json_path}; + } + size_t i = 0; + + // if JSONPath starts with $, skip it + if (json_path.front() == '$') { + i = 1; + } + + + if (i < json_path.length() && json_path[i] == '.') { + i += 1; + size_t key_start = i; + + while (i < json_path.length() && json_path[i] != '[' && json_path[i] != '.') { + ++i; + } + + key = json_path.substr(key_start, i - key_start); + } else if ((i+1 < json_path.size()) && json_path[i] == '[' && (json_path[i+1] == '\'' || json_path[i+1] == '"')) { + i += 2; + size_t key_start = i; + while (i < json_path.length() && json_path[i] != '\'' && json_path[i] != '"') { + ++i; + } + + key = json_path.substr(key_start, i - key_start); + + i += 2; + } else if ((i+2 < json_path.size()) && json_path[i] == '[' && json_path[i+1] == '*' && json_path[i+2] == ']') { // i.e [*].additional_keys or [*]["additional_keys"] + key = "*"; + i += 3; + } + + + return std::make_pair(key, json_path.substr(i)); +} + } // namespace simdjson #endif // SIMDJSON_JSONPATHUTIL_H /* end file simdjson/jsonpathutil.h */ @@ -7100,11 +7708,22 @@ inline simdjson_result simdjson_result::at_pointer(std return at_pointer(json_pointer); } +inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) const noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } +inline std::vector& simdjson_result::get_values(std::vector& out) const noexcept { + return first.get_values(out); +} + namespace dom { // @@ -7175,6 +7794,93 @@ inline simdjson_result array::at_path(std::string_view json_path) const return at_pointer(json_pointer); } +inline void array::process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept { + if (current == end) { + return; + } + + simdjson_result> result; + + + for (auto it = current; it != end; ++it) { + std::vector child_result; + auto error = it->at_path_with_wildcard(path_suffix).get(child_result); + if(error) { + continue; + } + accumulator.reserve(accumulator.size() + child_result.size()); + accumulator.insert(accumulator.end(), + std::make_move_iterator(child_result.begin()), + std::make_move_iterator(child_result.end())); + } +} + +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + size_t i = 0; + // json_path.starts_with('$') requires C++20. + if (!json_path.empty() && json_path.front() == '$') { + i = 1; + } + + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { + return INVALID_JSON_POINTER; + } + + if (json_path.find("*") != std::string::npos) { + std::vector child_values; + + if ( + (json_path.compare(i, 3, "[*]") == 0 && json_path.size() == i + 3) || + (json_path.compare(i, 2,".*") == 0 && json_path.size() == i + 2) + ) { + get_values(child_values); + return child_values; + } + + std::pair key_and_json_path = get_next_key_and_json_path(json_path); + + std::string_view key = key_and_json_path.first; + json_path = key_and_json_path.second; + + if (key.size() > 0) { + if (key == "*") { + get_values(child_values); + } else { + element pointer_result; + std::string json_pointer = std::string("/") + std::string(key); + auto error = at_pointer(json_pointer).get(pointer_result); + + if (!error) { + child_values.emplace_back(pointer_result); + } + } + + std::vector result = {}; + + if (child_values.size() > 0) { + std::vector::iterator child_values_begin = child_values.begin(); + std::vector::iterator child_values_end = child_values.end(); + + process_json_path_of_child_elements(child_values_begin, child_values_end, json_path, result); + } + + return result; + } else { + return INVALID_JSON_POINTER; + } + } else { + element result; + auto error = at_path(json_path).get(result); + if (error) { + return error; + } + + return std::vector{std::move(result)}; + } +} + inline simdjson_result array::at(size_t index) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 size_t i=0; @@ -7185,6 +7891,15 @@ inline simdjson_result array::at(size_t index) const noexcept { return INDEX_OUT_OF_BOUNDS; } +inline std::vector& array::get_values(std::vector& out) const noexcept { + out.reserve(this->size()); + for (auto element : *this) { + out.emplace_back(element); + } + + return out; +} + inline array::operator element() const noexcept { return element(tape); } @@ -7284,10 +7999,19 @@ inline simdjson_result simdjson_result::at_path(std:: if (json_pointer == "-1") { return INVALID_JSON_POINTER; } return at_pointer(json_pointer); } +inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) const noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } +inline std::vector& simdjson_result::get_values(std::vector& out) const noexcept { + return first.get_values(out); +} inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); @@ -7387,6 +8111,97 @@ inline simdjson_result object::at_path(std::string_view json_path) cons return at_pointer(json_pointer); } +inline void object::process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept { + if (current == end) { + return; + } + + simdjson_result> result; + + for (auto it = current; it != end; ++it) { + std::vector child_result; + auto error = it->at_path_with_wildcard(path_suffix).get(child_result); + if(error) { + continue; + } + accumulator.reserve(accumulator.size() + child_result.size()); + accumulator.insert(accumulator.end(), + std::make_move_iterator(child_result.begin()), + std::make_move_iterator(child_result.end())); + } +} + +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + size_t i = 0; + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + // if JSONPath starts with $, skip it + // json_path.starts_with('$') requires C++20. + if (json_path.front() == '$') { + i = 1; + } + + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { + // expect JSONPath expressions to always start with $ but this isn't currently + // expected in jsonpathutil.h. + return INVALID_JSON_POINTER; + } + + if (json_path.find("*") != std::string::npos) { + + std::vector child_values; + + if ( + (json_path.compare(i, 3, "[*]") == 0 && json_path.size() == i + 3) || + (json_path.compare(i, 2,".*") == 0 && json_path.size() == i + 2) + ) { + get_values(child_values); + return child_values; + } + + std::pair key_and_json_path = get_next_key_and_json_path(json_path); + + std::string_view key = key_and_json_path.first; + json_path = key_and_json_path.second; + + if (key.size() > 0) { + if (key == "*") { + get_values(child_values); + } else { + element pointer_result; + auto error = at_pointer(std::string("/") + std::string(key)).get(pointer_result); + + if (!error) { + child_values.emplace_back(pointer_result); + } + } + + std::vector result = {}; + if (child_values.size() > 0) { + + std::vector::iterator child_values_begin = child_values.begin(); + std::vector::iterator child_values_end = child_values.end(); + + process_json_path_of_child_elements(child_values_begin, child_values_end, json_path, result); + } + + return result; + } else { + return INVALID_JSON_POINTER; + } + } else { + element result; + auto error = this->at_path(json_path).get(result); + if (error) { + return error; + } + return std::vector{std::move(result)}; + } +} + inline simdjson_result object::at_key(std::string_view key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { @@ -7396,6 +8211,18 @@ inline simdjson_result object::at_key(std::string_view key) const noexc } return NO_SUCH_FIELD; } + +inline std::vector& object::get_values(std::vector& out) const noexcept { + iterator end_field = end(); + iterator begin_field = begin(); + + out.reserve(std::distance(begin_field, end_field)); + for (iterator field = begin_field; field != end_field; ++field) { + out.emplace_back(field.value()); + } + + return out; +} // In case you wonder why we need this, please see // https://github.com/simdjson/simdjson/issues/323 // People do seek keys in a case-insensitive manner. @@ -7507,14 +8334,14 @@ inline key_value_pair::key_value_pair(std::string_view _key, element _value) noe } // namespace simdjson -#if defined(__cpp_lib_ranges) +#if SIMDJSON_SUPPORTS_RANGES static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) +#endif // SIMDJSON_SUPPORTS_RANGES #endif // SIMDJSON_OBJECT_INL_H /* end file simdjson/dom/object-inl.h */ @@ -7638,6 +8465,12 @@ simdjson_inline simdjson_result simdjson_result::at_ if (json_pointer == "-1") { return INVALID_JSON_POINTER; } return at_pointer(json_pointer); } + +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(const std::string_view json_path) const noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} + #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { @@ -7928,6 +8761,20 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe } } } + +inline simdjson_result> element::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_path_with_wildcard(json_path); + case internal::tape_type::START_ARRAY: + return array(tape).at_path_with_wildcard(json_path); + default: + return std::vector{}; + } +} + inline simdjson_result element::at_path(std::string_view json_path) const noexcept { auto json_pointer = json_path_to_pointer_conversion(json_path); if (json_pointer == "-1") { return INVALID_JSON_POINTER; } @@ -7994,14 +8841,14 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) { #endif // SIMDJSON_ELEMENT_INL_H /* end file simdjson/dom/element-inl.h */ -#if defined(__cpp_lib_ranges) +#if SIMDJSON_SUPPORTS_RANGES static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) +#endif // SIMDJSON_SUPPORTS_RANGES #endif // SIMDJSON_ARRAY_INL_H /* end file simdjson/dom/array-inl.h */ @@ -8052,11 +8899,11 @@ inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { return valid ? doc.dump_raw_tape(os) : false; } -inline simdjson_result parser::read_file(const std::string &path) noexcept { +inline simdjson_result parser::read_file(std::string_view path) noexcept { // Open the file SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(path.c_str(), "rb"); + std::FILE *fp = std::fopen(path.data(), "rb"); SIMDJSON_POP_DISABLE_WARNINGS if (fp == nullptr) { @@ -8108,18 +8955,18 @@ inline simdjson_result parser::read_file(const std::string &path) noexce return bytes_read; } -inline simdjson_result parser::load(const std::string &path) & noexcept { +inline simdjson_result parser::load(std::string_view path) & noexcept { return load_into_document(doc, path); } -inline simdjson_result parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { +inline simdjson_result parser::load_into_document(document& provided_doc, std::string_view path) & noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } return parse_into_document(provided_doc, loaded_bytes.get(), len, false); } -inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { +inline simdjson_result parser::load_many(std::string_view path, size_t batch_size) noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } @@ -8855,8 +9702,8 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { #define SIMDJSON_SERIALIZATION_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/serialization.h" */ /* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/dom/serialization.h" */ /* skipped duplicate #include "simdjson/internal/tape_type.h" */ /* skipped duplicate #include "simdjson/dom/array-inl.h" */ @@ -8868,7 +9715,9 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { namespace simdjson { namespace dom { inline bool parser::print_json(std::ostream &os) const noexcept { - if (!valid) { return false; } + if (!valid) { + return false; + } simdjson::internal::string_builder<> sb; sb.append(doc.root()); std::string_view answer = sb.str(); @@ -8876,37 +9725,51 @@ inline bool parser::print_json(std::ostream &os) const noexcept { return true; } -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif @@ -8921,8 +9784,9 @@ namespace { * We expect that most compilers will use 8 bytes for this data structure. **/ struct escape_sequence { - uint8_t length; - const char string[7]; // technically, we only ever need 6 characters, we pad to 8 + uint8_t length; + const char + string[7]; // technically, we only ever need 6 characters, we pad to 8 }; /**@private * This converts a signed integer into a character sequence. @@ -8938,7 +9802,7 @@ static char *fast_itoa(char *output, int64_t value) noexcept { char buffer[20]; uint64_t value_positive; // In general, negating a signed integer is unsafe. - if(value < 0) { + if (value < 0) { *output++ = '-'; // Doing value_positive = -value; while avoiding // undefined behavior warnings. @@ -8957,7 +9821,7 @@ static char *fast_itoa(char *output, int64_t value) noexcept { // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. - while(value_positive >= 10) { + while (value_positive >= 10) { *write_pointer-- = char('0' + (value_positive % 10)); value_positive /= 10; } @@ -8983,7 +9847,7 @@ static char *fast_itoa(char *output, uint64_t value) noexcept { // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. - while(value >= 10) { + while (value >= 10) { *write_pointer-- = char('0' + (value % 10)); value /= 10; }; @@ -8993,7 +9857,6 @@ static char *fast_itoa(char *output, uint64_t value) noexcept { return output + len; } - } // anonymous namespace namespace internal { @@ -9001,194 +9864,209 @@ namespace internal { * Minifier/formatter code. **/ -template +template simdjson_inline void base_formatter::number(uint64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template +template simdjson_inline void base_formatter::number(int64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template +template simdjson_inline void base_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is // safe because our implementation does not check the second // argument. char *newp = internal::to_chars(number_buffer, nullptr, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template -simdjson_inline void base_formatter::start_array() { one_char('['); } - +template +simdjson_inline void base_formatter::start_array() { + one_char('['); +} -template -simdjson_inline void base_formatter::end_array() { one_char(']'); } +template +simdjson_inline void base_formatter::end_array() { + one_char(']'); +} -template -simdjson_inline void base_formatter::start_object() { one_char('{'); } +template +simdjson_inline void base_formatter::start_object() { + one_char('{'); +} -template -simdjson_inline void base_formatter::end_object() { one_char('}'); } +template +simdjson_inline void base_formatter::end_object() { + one_char('}'); +} -template -simdjson_inline void base_formatter::comma() { one_char(','); } +template +simdjson_inline void base_formatter::comma() { + one_char(','); +} -template +template simdjson_inline void base_formatter::true_atom() { - const char * s = "true"; - buffer.insert(buffer.end(), s, s + 4); + const char *s = "true"; + chars(s, s + 4); } -template +template simdjson_inline void base_formatter::false_atom() { - const char * s = "false"; - buffer.insert(buffer.end(), s, s + 5); + const char *s = "false"; + chars(s, s + 5); } -template +template simdjson_inline void base_formatter::null_atom() { - const char * s = "null"; - buffer.insert(buffer.end(), s, s + 4); + const char *s = "null"; + chars(s, s + 4); +} + +template +simdjson_inline void base_formatter::one_char(char c) { + buffer.push_back(c); } -template -simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } +template +simdjson_inline void base_formatter::chars(const char *begin, + const char *end) { + buffer.append(begin, end); +} -template -simdjson_inline void base_formatter::key(std::string_view unescaped) { +template +simdjson_inline void +base_formatter::key(std::string_view unescaped) { string(unescaped); one_char(':'); } -template -simdjson_inline void base_formatter::string(std::string_view unescaped) { +template +simdjson_inline void +base_formatter::string(std::string_view unescaped) { one_char('\"'); size_t i = 0; - // Fast path for the case where we have no control character, no ", and no backslash. - // This should include most keys. - // - // We would like to use 'bool' but some compilers take offense to bitwise operation - // with bool types. - constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for(;i + 8 <= unescaped.length(); i += 8) { + // Fast path for the case where we have no control character, no ", and no + // backslash. This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise + // operation with bool types. + constexpr static char needs_escaping[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (; i + 8 <= unescaped.length(); i += 8) { // Poor's man vectorization. This could get much faster if we used SIMD. // - // It is not the case that replacing '|' with '||' would be neutral performance-wise. - if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] - | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] - | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] - | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] - ) { break; } - } - for(;i < unescaped.length(); i++) { - if(needs_escaping[uint8_t(unescaped[i])]) { break; } - } - // The following is also possible and omits a 256-byte table, but it is slower: - // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // It is not the case that replacing '|' with '||' would be neutral + // performance-wise. + if (needs_escaping[uint8_t(unescaped[i])] | + needs_escaping[uint8_t(unescaped[i + 1])] | + needs_escaping[uint8_t(unescaped[i + 2])] | + needs_escaping[uint8_t(unescaped[i + 3])] | + needs_escaping[uint8_t(unescaped[i + 4])] | + needs_escaping[uint8_t(unescaped[i + 5])] | + needs_escaping[uint8_t(unescaped[i + 6])] | + needs_escaping[uint8_t(unescaped[i + 7])]) { + break; + } + } + for (; i < unescaped.length(); i++) { + if (needs_escaping[uint8_t(unescaped[i])]) { + break; + } + } + // The following is also possible and omits a 256-byte table, but it is + // slower: for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} // At least for long strings, the following should be fast. We could // do better by integrating the checks and the insertion. - buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + chars(unescaped.data(), unescaped.data() + i); // We caught a control character if we enter this loop (slow). // Note that we are do not restart from the beginning, but rather we continue // from the point where we encountered something that requires escaping. for (; i < unescaped.length(); i++) { switch (unescaped[i]) { - case '\"': - { - const char * s = "\\\""; - buffer.insert(buffer.end(), s, s + 2); - } - break; - case '\\': - { - const char * s = "\\\\"; - buffer.insert(buffer.end(), s, s + 2); - } - break; + case '\"': { + const char *s = "\\\""; + chars(s, s + 2); + } break; + case '\\': { + const char *s = "\\\\"; + chars(s, s + 2); + } break; default: if (uint8_t(unescaped[i]) <= 0x1F) { // If packed, this uses 8 * 32 bytes. // Note that we expect most compilers to embed this code in the data // section. constexpr static escape_sequence escaped[32] = { - {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, - {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, - {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, - {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, - {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, - {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, - {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, - {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; auto u = escaped[uint8_t(unescaped[i])]; - buffer.insert(buffer.end(), u.string, u.string + u.length); + chars(u.string, u.string + u.length); } else { one_char(unescaped[i]); } } // switch - } // for + } // for one_char('\"'); } - -template -inline void base_formatter::clear() { +template inline void base_formatter::clear() { buffer.clear(); } -template +template simdjson_inline std::string_view base_formatter::str() const { - return std::string_view(buffer.data(), buffer.size()); + return buffer.str(); } -simdjson_inline void mini_formatter::print_newline() { - return; -} +simdjson_inline void mini_formatter::print_newline() { return; } simdjson_inline void mini_formatter::print_indents(size_t depth) { - (void)depth; - return; + (void)depth; + return; } -simdjson_inline void mini_formatter::print_space() { - return; -} +simdjson_inline void mini_formatter::print_space() { return; } -simdjson_inline void pretty_formatter::print_newline() { - one_char('\n'); -} +simdjson_inline void pretty_formatter::print_newline() { one_char('\n'); } simdjson_inline void pretty_formatter::print_indents(size_t depth) { - if(this->indent_step <= 0) { - return; - } - for(size_t i = 0; i < this->indent_step * depth; i++) { - one_char(' '); - } -} - -simdjson_inline void pretty_formatter::print_space() { + if (this->indent_step <= 0) { + return; + } + for (size_t i = 0; i < this->indent_step * depth; i++) { one_char(' '); + } } +simdjson_inline void pretty_formatter::print_space() { one_char(' '); } + /*** * String building code. **/ @@ -9366,7 +10244,8 @@ inline void string_builder::append(simdjson::dom::array value) { } template -simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { +simdjson_inline void +string_builder::append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); } @@ -9381,7 +10260,6 @@ simdjson_inline std::string_view string_builder::str() const { return format.str(); } - } // namespace internal } // namespace simdjson @@ -9807,7 +10685,665 @@ extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; // The truncated powers of five from 5^-342 all the way to 5^308 // The mantissa is truncated to 128 bits, and // never rounded up. Uses about 10KB. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +// We use the template trick to allow inclusion in multiple translation units. +#if SIMDJSON_STATIC_REFLECTION +template struct powers_template { +constexpr static uint64_t power_of_five_128[651*2]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; +}; +#endif // SIMDJSON_STATIC_REFLECTION + +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[651*2]; } // namespace internal } // namespace simdjson @@ -10592,6 +12128,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -10668,7 +12210,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -10681,7 +12223,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -10975,7 +12522,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -11004,6 +12551,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -11423,12 +12996,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -11437,6 +13015,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -11478,6 +13066,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -11660,7 +13252,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -11693,7 +13290,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -11854,7 +13455,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -11882,7 +13483,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -11971,7 +13572,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -12034,13 +13635,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -12066,7 +13667,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -12140,7 +13741,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -12601,6 +14211,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -12835,12 +14451,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -13009,7 +14654,7 @@ namespace { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return c == '"'; } simdjson_inline bool has_backslash() { return c == '\\'; } @@ -13025,6 +14670,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -13531,12 +15194,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -13545,6 +15213,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -13586,6 +15264,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -13768,7 +15450,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -13801,7 +15488,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -13962,7 +15653,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -13990,7 +15681,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -14079,7 +15770,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -14142,13 +15833,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -14174,7 +15865,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -14248,7 +15939,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -14709,6 +16409,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -14943,12 +16649,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -15690,7 +17425,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -15714,6 +17449,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -16131,12 +17891,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -16145,6 +17910,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -16186,6 +17961,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -16368,7 +18147,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -16401,7 +18185,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -16562,7 +18350,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -16590,7 +18378,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -16679,7 +18467,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -16742,13 +18530,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -16774,7 +18562,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -16848,7 +18636,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -17309,6 +19106,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -17543,12 +19346,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -17908,7 +19740,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -18227,7 +20058,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -18251,6 +20082,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -18728,12 +20588,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -18742,6 +20607,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -18783,6 +20658,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -18965,7 +20844,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -18998,7 +20882,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -19159,7 +21047,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -19187,7 +21075,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -19276,7 +21164,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -19339,13 +21227,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -19371,7 +21259,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -19445,7 +21333,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -19906,6 +21803,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -20140,12 +22043,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -20982,7 +22914,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { @@ -21023,6 +22955,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -21442,12 +23400,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -21456,6 +23419,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -21497,6 +23470,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -21679,7 +23656,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -21712,7 +23694,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -21873,7 +23859,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -21901,7 +23887,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -21990,7 +23976,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -22053,13 +24039,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -22085,7 +24071,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -22159,7 +24145,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -22620,6 +24615,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -22854,12 +24855,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -24030,7 +26060,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -24056,6 +26086,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -24473,12 +26528,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -24487,6 +26547,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -24528,6 +26598,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -24710,7 +26784,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -24743,7 +26822,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -24904,7 +26987,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -24932,7 +27015,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -25021,7 +27104,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -25084,13 +27167,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -25116,7 +27199,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -25190,7 +27273,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -25651,6 +27743,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -25885,12 +27983,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -26533,7 +28660,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -26562,6 +28689,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + static_cast((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -26981,12 +29133,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -26995,6 +29152,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -27036,6 +29203,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -27218,7 +29389,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -27251,7 +29427,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -27412,7 +29592,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -27440,7 +29620,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -27529,7 +29709,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -27592,13 +29772,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -27624,7 +29804,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -27698,7 +29878,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -28159,6 +30348,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -28393,12 +30588,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -29060,7 +31284,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -29083,6 +31307,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -29502,12 +31751,17 @@ struct implementation_simdjson_result_base { * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + + /** + * Whether there is a value. + */ + simdjson_warn_unused simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -29516,6 +31770,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -29557,6 +31821,10 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + + using value_type = T; + using error_type = error_code; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -29739,7 +32007,12 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index]); +#else simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); +#endif + // Both i and power_of_five_128[index] have their most significant bit set to 1 which // implies that the either the most or the second most significant bit of the product // is 1. We pack values in this manner for efficiency reasons: it maximizes the use @@ -29772,7 +32045,11 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // with a returned value of type value128 with a "low component" corresponding to the // 64-bit least significant bits of the product and with a "high component" corresponding // to the 64-bit most significant bits of the product. +#if SIMDJSON_STATIC_REFLECTION + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); +#else simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); +#endif firstproduct.low += secondproduct.high; if(secondproduct.high > firstproduct.low) { firstproduct.high++; } // As it has been proven by Noble Mushtak and Daniel Lemire in "Fast Number Parsing Without @@ -29933,7 +32210,7 @@ simdjson_inline bool is_digit(const uint8_t c) { return static_cast(c - '0') <= 9; } -simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -29961,7 +32238,7 @@ simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const u return SUCCESS; } -simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_warn_unused simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -30050,7 +32327,7 @@ static error_code slow_float_parsing(simdjson_unused const uint8_t * src, double /** @private */ template -simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_warn_unused simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -30113,13 +32390,13 @@ simdjson_inline error_code write_float(const uint8_t *const src, bool negative, // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer); // for performance analysis, it is sometimes useful to skip parsing #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } @@ -30145,7 +32422,7 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_warn_unused simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // @@ -30219,7 +32496,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -30680,6 +32966,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -30914,12 +33206,41 @@ simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_b } template -simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_warn_unused simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -31011,6 +33332,7 @@ simdjson_inline implementation_simdjson_result_base::implementation_simdjson_ // Internal headers needed for ondemand generics. // All includes not under simdjson/generic/ondemand must be here! // Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/concepts.h" */ /* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* skipped duplicate #include "simdjson/implementation.h" */ /* skipped duplicate #include "simdjson/padded_string.h" */ @@ -31445,6 +33767,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -31521,7 +33849,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -31534,7 +33862,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -31828,7 +34161,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -31857,6 +34190,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -31925,7 +34284,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for arm64 */ /* including simdjson/generic/ondemand/deserialize.h for arm64: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for arm64 */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -31934,55 +34293,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -32004,7 +34316,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -32015,28 +34327,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = arm64::ondemand::array; + using object_type = arm64::ondemand::object; using value_type = arm64::ondemand::value; using document_type = arm64::ondemand::document; using document_reference_type = arm64::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -32046,7 +34374,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for arm64 */ /* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -32388,7 +34716,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -32472,8 +34800,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -32483,8 +34811,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -32521,7 +34849,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -32558,6 +34886,7 @@ struct simdjson_result : public arm64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -32586,13 +34915,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -32609,22 +34939,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -32634,7 +34980,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -32752,7 +35098,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -32929,12 +35275,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -32963,7 +35311,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -33206,6 +35555,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -33273,7 +35630,7 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -33302,12 +35659,14 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -33334,7 +35693,8 @@ struct simdjson_result : public arm64::implementation_si * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -33354,7 +35714,22 @@ struct simdjson_result : public arm64::implementation_si simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -33378,6 +35753,7 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -33848,14 +36224,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -33875,7 +36251,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -33966,6 +36342,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -34172,6 +36549,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -34311,10 +36694,10 @@ struct simdjson_result : public arm64::impleme simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -34330,6 +36713,7 @@ struct simdjson_result : public arm64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace arm64 { @@ -34448,7 +36832,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -34536,6 +36922,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -34543,10 +36934,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -34572,14 +36963,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -34681,13 +37074,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -34715,6 +37134,314 @@ struct simdjson_result : public arm64::implementation_s #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for arm64 */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for arm64: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace arm64 { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(arm64::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace arm64 { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::arm64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::arm64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::arm64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::arm64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for arm64 */ + // All other declarations /* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for arm64 */ @@ -34725,6 +37452,7 @@ struct simdjson_result : public arm64::implementation_s /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -34827,7 +37555,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -34837,6 +37565,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -34851,11 +37588,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -34928,8 +37696,30 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -34974,7 +37764,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -34998,6 +37789,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -35016,7 +37812,6 @@ namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -35029,6 +37824,8 @@ struct simdjson_result : public arm64::implemen simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -35045,6 +37842,7 @@ struct simdjson_result : public arm64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -35156,7 +37954,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -35222,7 +38020,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35245,7 +38043,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35263,18 +38061,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -35286,7 +38084,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -35296,7 +38094,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -35361,7 +38159,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -35370,7 +38168,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -35441,12 +38239,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -35485,7 +38285,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -35520,11 +38321,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -35724,7 +38541,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -35742,17 +38559,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -35805,7 +38669,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -35814,7 +38678,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35827,7 +38691,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35849,14 +38713,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -35868,7 +38732,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -35878,12 +38742,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -35924,6 +38793,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -35952,7 +38822,7 @@ struct simdjson_result : public arm64::implementation simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -35965,6 +38835,9 @@ struct simdjson_result : public arm64::implementation template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using arm64::implementation_simdjson_result_base::operator*; + using arm64::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator arm64::ondemand::array() & noexcept(false); @@ -36004,6 +38877,12 @@ struct simdjson_result : public arm64::implementation simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -36030,7 +38909,7 @@ struct simdjson_result : public arm64::impl simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -36081,6 +38960,12 @@ struct simdjson_result : public arm64::impl simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -36173,7 +39058,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -36223,6 +39108,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -36236,6 +39122,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -36279,6 +39166,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -36290,6 +39182,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -36553,6 +39446,10 @@ struct simdjson_result : public arm64::implementation_si /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -36574,12 +39471,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -36622,7 +39521,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -36705,6 +39605,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -36750,11 +39659,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -36793,12 +39762,43 @@ struct simdjson_result : public arm64::implementation_s simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -36927,6 +39927,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -36998,28 +40012,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -37033,7 +40049,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -37042,7 +40057,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -37055,8 +40069,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -37073,7 +40102,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -37082,9 +40110,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - arm64::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, arm64::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -37115,7 +40147,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -37141,7 +40172,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + arm64::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::value &val, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::document &doc, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::document_reference &doc, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -37159,7 +40228,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -37184,17 +40252,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -37203,10 +40271,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + arm64::ondemand::object obj; + if constexpr (std::is_same_v, arm64::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for arm64 */ // Inline definitions @@ -37299,7 +40686,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -37384,6 +40771,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -37442,6 +40890,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -37490,6 +40942,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -37526,7 +40981,9 @@ simdjson_inline simdjson_result &simdjson_resul ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -37583,7 +41040,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -37625,15 +41082,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -37831,6 +41288,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -38081,6 +41551,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -38230,7 +41708,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -38256,15 +41734,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -38284,8 +41762,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -38334,7 +41812,7 @@ simdjson_inline simdjson_result document::operator[](const char *key) & n return start_or_resume_object()[key]; } -simdjson_inline error_code document::consume() noexcept { +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { bool scalar = false; auto error = is_scalar().get(scalar); if(error) { return error; } @@ -38436,6 +41914,69 @@ simdjson_inline simdjson_result document::at_path(std::string_view json_p } } +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -38543,7 +42084,7 @@ simdjson_inline simdjson_result simdjson_result -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } @@ -38579,12 +42120,12 @@ simdjson_deprecated simdjson_inline simdjson_result simdjson_result(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } @@ -38594,8 +42135,8 @@ template<> simdjson_deprecated simdjson_inline simdjson_result(first); } -template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; @@ -38713,6 +42254,20 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} + +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION + } // namespace simdjson @@ -38747,7 +42302,7 @@ simdjson_inline simdjson_result document_reference::get_double() noexcep simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } @@ -38798,9 +42353,16 @@ simdjson_inline simdjson_result document_reference::get_number() noexcep simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} simdjson_inline document_reference::operator document&() const noexcept { return *doc; } - +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); +} +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -38897,7 +42459,7 @@ simdjson_inline simdjson_result simdjson_result -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } @@ -38932,12 +42494,12 @@ simdjson_inline simdjson_result simdjson_result(first).get(); } template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } @@ -38954,13 +42516,13 @@ simdjson_inline simdjson_result simdjson_result -simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) & noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) & noexcept { if (error()) { return error(); } out = first; return SUCCESS; } template <> -simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) && noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(arm64::ondemand::document_reference &out) && noexcept { if (error()) { return error(); } out = first; return SUCCESS; @@ -39048,7 +42610,20 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H @@ -39239,10 +42814,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -39360,7 +42944,10 @@ inline void document_stream::next_document() noexcept { // Always set depth=1 at the start of document doc.iter._depth = 1; // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } // Resets the string buffer at the beginning, thus invalidating the strings. doc.iter._string_buf_loc = parser->string_buf.get(); doc.iter._root = doc.iter.position(); @@ -39606,7 +43193,7 @@ simdjson_inline simdjson_result simdjson_result -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.unescaped_key(receiver, allow_replacement); } @@ -39842,6 +43429,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -39966,7 +43555,7 @@ simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; @@ -40010,7 +43599,7 @@ simdjson_inline void json_iterator::reenter_child(token_position position, depth _depth = child_depth; } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { if (*peek() == c) { return_current_and_advance(); return SUCCESS; @@ -40033,7 +43622,7 @@ simdjson_inline void json_iterator::set_start_position(depth_t depth, token_posi #endif -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; @@ -40428,6 +44017,11 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -40485,7 +44079,7 @@ simdjson_inline simdjson_result object::start_root(value_iterator &iter) SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } -simdjson_inline error_code object::consume() noexcept { +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is @@ -40591,6 +44185,50 @@ inline simdjson_result object::at_path(std::string_view json_path) noexce return at_pointer(json_pointer); } +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; + } else { + return val.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. @@ -40614,6 +44252,52 @@ simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -40671,6 +44355,11 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} + inline simdjson_result simdjson_result::reset() noexcept { if (error()) { return error(); } return first.reset(); @@ -40690,6 +44379,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40904,7 +44594,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40936,10 +44626,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -40961,7 +44648,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40976,6 +44663,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -40984,16 +44672,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -41028,6 +44724,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -41062,8 +44786,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -41240,6 +44969,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -41265,6 +44998,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -41630,7 +45366,7 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start if (*_json_iter->peek() == '}') { logger::log_value(*_json_iter, "empty object"); _json_iter->return_current_and_advance(); - end_container(); + SIMDJSON_TRY(end_container()); return false; } return true; @@ -42484,7 +46220,7 @@ simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { _json_iter->ascend_to(depth()-1); } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; @@ -42646,7 +46382,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -42686,852 +46422,2873 @@ simdjson_inline simdjson_result::simdjson_resul #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for arm64: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H -/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ -/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ -/* begin file simdjson/arm64/end.h */ /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/arm64/end.h */ - -#endif // SIMDJSON_ARM64_ONDEMAND_H -/* end file simdjson/arm64/ondemand.h */ -#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) -/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ -/* begin file simdjson/fallback/ondemand.h */ -#ifndef SIMDJSON_FALLBACK_ONDEMAND_H -#define SIMDJSON_FALLBACK_ONDEMAND_H +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ -/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ -/* begin file simdjson/fallback/begin.h */ -/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ -#define SIMDJSON_IMPLEMENTATION fallback -/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ -/* begin file simdjson/fallback/base.h */ -#ifndef SIMDJSON_FALLBACK_BASE_H -#define SIMDJSON_FALLBACK_BASE_H +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif namespace simdjson { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { +namespace arm64 { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -class implementation; +/** -} // namespace fallback -} // namespace simdjson +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -#endif // SIMDJSON_FALLBACK_BASE_H -/* end file simdjson/fallback/base.h */ -/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ -/* begin file simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +**/ -namespace simdjson { -namespace fallback { -namespace { +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; } -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } #endif -/* result might be undefined when input_num is zero */ -simdjson_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -} // unnamed namespace -} // namespace fallback -} // namespace simdjson +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file simdjson/fallback/bitmanipulation.h */ -/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ -/* begin file simdjson/fallback/stringparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} -namespace simdjson { -namespace fallback { -namespace { +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} - simdjson_inline bool has_quote_first() { return c == '"'; } - simdjson_inline bool has_backslash() { return c == '\\'; } - simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} - uint8_t c; -}; // struct backslash_and_quote +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} -simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } } -} // unnamed namespace -} // namespace fallback -} // namespace simdjson +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} -#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H -/* end file simdjson/fallback/stringparsing_defs.h */ -/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ -/* begin file simdjson/fallback/numberparsing_defs.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +namespace internal { -#include +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -namespace simdjson { -namespace fallback { -namespace numberparsing { +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } } -/** @private */ -static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } } -#if SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); } -static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); } #endif -/** @private */ -simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { - internal::value128 answer; -#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS -#if SIMDJSON_IS_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // SIMDJSON_IS_ARM64 -#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); } -} // namespace numberparsing -} // namespace fallback -} // namespace simdjson +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} -#ifndef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_IS_BIG_ENDIAN -#define SIMDJSON_SWAR_NUMBER_PARSING 0 -#else -#define SIMDJSON_SWAR_NUMBER_PARSING 1 -#endif -#endif +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H -/* end file simdjson/fallback/numberparsing_defs.h */ -/* end file simdjson/fallback/begin.h */ -/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ -/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ -#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) -#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! -#endif +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} -// Stuff other things depend on -/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ -/* begin file simdjson/generic/ondemand/base.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); -namespace simdjson { -namespace fallback { -/** - * A fast, simple, DOM-like interface that parses JSON as you use it. - * - * Designed for maximum speed and a lower memory profile. - */ -namespace ondemand { + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; -/** Represents the depth of a JSON value (number of nested arrays/objects). */ -using depth_t = int32_t; + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } -/** @copydoc simdjson::fallback::number_type */ -using number_type = simdjson::fallback::number_type; + // Append first item without leading comma + append(*it); + ++it; -/** @private Position in the JSON buffer indexes */ -using token_position = const uint32_t *; + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + } +} -class array; -class array_iterator; -class document; -class document_reference; -class document_stream; -class field; -class json_iterator; -enum class json_type; -struct number; -class object; -class object_iterator; -class parser; -class raw_json_string; -class token_iterator; -class value; -class value_iterator; +#endif -} // namespace ondemand -} // namespace fallback -} // namespace simdjson +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); +} -#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H -/* end file simdjson/generic/ondemand/base.h for fallback */ -/* including simdjson/generic/ondemand/deserialize.h for fallback: #include "simdjson/generic/ondemand/deserialize.h" */ -/* begin file simdjson/generic/ondemand/deserialize.h for fallback */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif -#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} -#include -namespace simdjson { +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} -namespace tag_invoke_fn_ns { -void tag_invoke(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; } -}; -} // namespace tag_invoke_fn_ns +} -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} -template -using tag_invoke_result = - std::invoke_result; +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} -template -using tag_invoke_result_t = - std::invoke_result_t; +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} -template using tag_t = std::decay_t; +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } +} +#endif -struct deserialize_tag; +} // namespace builder +} // namespace arm64 +} // namespace simdjson -/// These types are deserializable in a built-in way -template struct is_builtin_deserializable : std::false_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; -template <> struct is_builtin_deserializable : std::true_type {}; +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_builder.h for arm64: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H -template -concept is_builtin_deserializable_v = is_builtin_deserializable::value; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION -template -concept custom_deserializable = tag_invocable; +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet -template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +namespace simdjson { +namespace arm64 { +namespace builder { -template -concept nothrow_custom_deserializable = nothrow_tag_invocable; +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); +} -// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. -template -concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} -/// Deserialize Tag -inline constexpr struct deserialize_tag { - using value_type = fallback::ondemand::value; - using document_type = fallback::ondemand::document; - using document_reference_type = fallback::ondemand::document_reference; +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} - // Customization Point for value - template - requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { - return tag_invoke(*this, object, output); + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); } +} - // Customization Point for document - template - requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { - return tag_invoke(*this, object, output); +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); } +} - // Customization Point for document reference - template - requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { - return tag_invoke(*this, object, output); +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); } + b.append(']'); +} +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} -} deserialize{}; +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} -} // namespace simdjson +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} -#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} -/* end file simdjson/generic/ondemand/deserialize.h for fallback */ -/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ -/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} -namespace simdjson { -namespace fallback { -namespace ondemand { +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); +} -/** - * Iterates through a single JSON value at a particular depth. - * - * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects - * the caller to call the right ones. - * - * @private This is not intended for external use. - */ -class value_iterator { -protected: - /** The underlying JSON iterator */ - json_iterator *_json_iter{}; - /** The depth of this value */ - depth_t _depth{}; - /** - * The starting token index for this value - */ - token_position _start_position{}; +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); +} -public: - simdjson_inline value_iterator() noexcept = default; - /** - * Denote that we're starting a document. - */ - simdjson_inline void start_document() noexcept; +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} - /** - * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. - * - * Optimized for scalars. - */ - simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} - /** - * Tell whether the iterator is at the EOF mark - */ - simdjson_inline bool at_end() const noexcept; +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} - /** - * Tell whether the iterator is at the start of the value - */ - simdjson_inline bool at_start() const noexcept; +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; - /** - * Tell whether the value is open--if the value has not been used, or the array/object is still open. - */ - simdjson_inline bool is_open() const noexcept; + b.append('{'); + bool first = true; - /** - * Tell whether the value is at an object's first field (just after the {). - */ - simdjson_inline bool at_first_field() const noexcept; + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { - /** - * Abandon all iteration. - */ - simdjson_inline void abandon() noexcept; + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); - /** - * Get the child value as a value_iterator. - */ - simdjson_inline value_iterator child_value() const noexcept; + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; - /** - * Get the depth of this value. - */ - simdjson_inline int32_t depth() const noexcept; + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); - /** - * Get the JSON type of this value. - * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_inline simdjson_result type() const noexcept; + // Serialize the value + atom(b, obj.[:mem:]); + } + } + }; - /** - * @addtogroup object Object iteration - * - * Methods to iterate and find object fields. These methods generally *assume* the value is - * actually an object; the caller is responsible for keeping track of that fact. - * - * @{ - */ + b.append('}'); +} - /** - * Start an object iteration. - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - */ - simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; - /** - * Start an object iteration from the root. - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - * @error TAPE_ERROR if there is no matching } at end of document - */ - simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; - /** - * Checks whether an object could be started from the root. May be called by start_root_object. - * - * @returns SUCCESS if it is possible to safely start an object from the root (document level). - * @error INCORRECT_TYPE if there is no opening { - * @error TAPE_ERROR if there is no matching } at end of document - */ - simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; - /** - * Start an object iteration after the user has already checked and moved past the {. - * - * Does not move the iterator unless the object is empty ({}). - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that - * invalidates the document. - */ - simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; - /** - * Start an object iteration from the root, after the user has already checked and moved past the {. - * - * Does not move the iterator unless the object is empty ({}). - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that - * invalidates the document. - */ - simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} - /** - * Moves to the next field in an object. - * - * Looks for , and }. If } is found, the object is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return whether there is another field in the object. - * @error TAPE_ERROR If there is a comma missing between fields. - * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. - */ - simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; +} // namespace builder +} // namespace arm64 +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = arm64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + arm64::builder::string_builder b(initial_capacity); + arm64::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = arm64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + arm64::builder::string_builder b(initial_capacity); + arm64::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = arm64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + arm64::builder::string_builder b(initial_capacity); + arm64::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} - /** - * Get the current field's key. - */ - simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; +} // namespace simdjson - /** - * Pass the : in the field and move to its value. - */ - simdjson_warn_unused simdjson_inline error_code field_value() noexcept; +#endif // SIMDJSON_STATIC_REFLECTION - /** - * Find the next field with the given key. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; +#endif +/* end file simdjson/generic/ondemand/json_builder.h for arm64 */ - /** - * Find the next field with the given key, *without* unescaping. This assumes object order: it - * will not find the field if it was already passed when looking for some *other* field. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for arm64: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for arm64 */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H - /** - * Find the field with the given key without regard to order, and *without* unescaping. - * - * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - /** @} */ +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION - /** - * @addtogroup array Array iteration - * Methods to iterate over array elements. These methods generally *assume* the value is actually - * an object; the caller is responsible for keeping track of that fact. - * @{ - */ +#include +#include +#include - /** - * Check for an opening [ and start an array iteration. - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - */ - simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; - /** - * Check for an opening [ and start an array iteration while at the root. - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - * @error TAPE_ERROR if there is no matching ] at end of document - */ - simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; - /** - * Checks whether an array could be started from the root. May be called by start_root_array. - * - * @returns SUCCESS if it is possible to safely start an array from the root (document level). - * @error INCORRECT_TYPE If there is no [. - * @error TAPE_ERROR if there is no matching ] at end of document - */ - simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; - /** - * Start an array iteration, after the user has already checked and moved past the [. - * - * Does not move the iterator unless the array is empty ([]). - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that - * invalidates the document. - */ - simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; - /** - * Start an array iteration from the root, after the user has already checked and moved past the [. - * - * Does not move the iterator unless the array is empty ([]). - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that - * invalidates the document. - */ - simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; +namespace simdjson { +namespace arm64 { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { - /** - * Moves to the next element in an array. - * - * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return Whether there is another element in the array. - * @error TAPE_ERROR If there is a comma missing between elements. - */ - simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h - /** - * Get a child value iterator. - */ - simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; +using ::simdjson::arm64::ondemand::value; - /** @} */ +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; - /** - * @defgroup scalar Scalar values - * @addtogroup scalar - * @{ - */ +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) - simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; - template - simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; - simdjson_warn_unused simdjson_inline bool is_negative() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } - simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; - template - simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; - simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; - simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; - simdjson_inline error_code error() const noexcept; - simdjson_inline uint8_t *&string_buf_loc() noexcept; - simdjson_inline const json_iterator &json_iter() const noexcept; - simdjson_inline json_iterator &json_iter() noexcept; +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); +} - simdjson_inline void assert_is_valid() const noexcept; - simdjson_inline bool is_valid() const noexcept; +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} - /** @} */ -protected: - /** - * Restarts an array iteration. - * @returns Whether the array has any elements (returns false for empty). - */ - simdjson_inline simdjson_result reset_array() noexcept; - /** - * Restarts an object iteration. - * @returns Whether the object has any fields (returns false for empty). - */ - simdjson_inline simdjson_result reset_object() noexcept; - /** - * move_at_start(): moves us so that we are pointing at the beginning of - * the container. It updates the index so that at_start() is true and it - * syncs the depth. The user can then create a new container instance. - * - * Usage: used with value::count_elements(). - **/ - simdjson_inline void move_at_start() noexcept; +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; - /** - * move_at_container_start(): moves us so that we are pointing at the beginning of - * the container so that assert_at_container_start() passes. - * - * Usage: used with reset_array() and reset_object(). - **/ - simdjson_inline void move_at_container_start() noexcept; - /* Useful for debugging and logging purposes. */ - inline std::string to_string() const noexcept; - simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); - simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; - simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_inline const uint8_t *peek_start() const noexcept; - simdjson_inline uint32_t peek_start_length() const noexcept; - simdjson_inline uint32_t peek_root_length() const noexcept; + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; + } - /** - * The general idea of the advance_... methods and the peek_* methods - * is that you first peek and check that you have desired type. If you do, - * and only if you do, then you advance. - * - * We used to unconditionally advance. But this made reasoning about our - * current state difficult. - * Suppose you always advance. Look at the 'value' matching the key - * "shadowable" in the following example... - * - * ({"globals":{"a":{"shadowable":[}}}}) - * - * If the user thinks it is a Boolean and asks for it, then we check the '[', - * decide it is not a Boolean, but still move into the next character ('}'). Now - * we are left pointing at '}' right after a '['. And we have not yet reported - * an error, only that we do not have a Boolean. - * - * If, instead, you just stand your ground until it is content that you know, then - * you will only even move beyond the '[' if the user tells you that you have an - * array. So you will be at the '}' character inside the array and, hopefully, you - * will then catch the error because an array cannot start with '}', but the code - * processing Boolean values does not know this. - * - * So the contract is: first call 'peek_...' and then call 'advance_...' only - * if you have determined that it is a type you can handle. - * - * Unfortunately, it makes the code more verbose, longer and maybe more error prone. - */ + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); - simdjson_inline void advance_scalar(const char *type) noexcept; - simdjson_inline void advance_root_scalar(const char *type) noexcept; - simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; - simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; - simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; - simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + return count; + } - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; - /** - * Advance to a place expecting a value (increasing depth). - * + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::arm64::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; + } + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } + } + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } + } + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } + } + + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); + } + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } + } + +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; + } + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; + } + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; + } + + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; + } + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; + } + + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } + + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; + + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; + + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::arm64::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); +} + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::arm64::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); +} + +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ + +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); + + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } + + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } + + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; + } + + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; + + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } + + return count; + } + + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; + + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } + + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); + + return pointer_str.substr(pos, token_end - pos); + } +}; + +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); + + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } + + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; + + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); + + std::string_view token = pointer_view.substr(pos, token_end - pos); + + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) return false; + } + + pos = token_end + 1; + } + + return true; + } + + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); + + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } + + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } + + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } + + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } + + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); + + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; + + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); + + std::string_view token = pointer_view.substr(pos, token_end - pos); + + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::arm64::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); +} + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::arm64::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); +} + +} // namespace json_path +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* end file simdjson/generic/ondemand/compile_time_accessors.h for arm64 */ + +/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_ONDEMAND_H +/* end file simdjson/arm64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ +/* begin file simdjson/fallback/ondemand.h */ +#ifndef SIMDJSON_FALLBACK_ONDEMAND_H +#define SIMDJSON_FALLBACK_ONDEMAND_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#if SIMDJSON_IS_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // SIMDJSON_IS_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#ifndef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_IS_BIG_ENDIAN +#define SIMDJSON_SWAR_NUMBER_PARSING 0 +#else +#define SIMDJSON_SWAR_NUMBER_PARSING 1 +#endif +#endif + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::fallback::number_type */ +using number_type = simdjson::fallback::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for fallback */ +/* including simdjson/generic/ondemand/deserialize.h for fallback: #include "simdjson/generic/ondemand/deserialize.h" */ +/* begin file simdjson/generic/ondemand/deserialize.h for fallback */ +#if SIMDJSON_SUPPORTS_CONCEPTS + +#ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_ONDEMAND_DESERIALIZE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +struct deserialize_tag; + +/// These types are deserializable in a built-in way +template struct is_builtin_deserializable : std::false_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; +template <> struct is_builtin_deserializable : std::true_type {}; + +template +concept is_builtin_deserializable_v = is_builtin_deserializable::value; + +template +concept custom_deserializable = tag_invocable; + +template +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; + +template +concept nothrow_custom_deserializable = nothrow_tag_invocable; + +// built-in types are noexcept and if an error happens, the value simply gets ignored and the error is returned. +template +concept nothrow_deserializable = nothrow_custom_deserializable || is_builtin_deserializable_v; + +/// Deserialize Tag +inline constexpr struct deserialize_tag { + using array_type = fallback::ondemand::array; + using object_type = fallback::ondemand::object; + using value_type = fallback::ondemand::value; + using document_type = fallback::ondemand::document; + using document_reference_type = fallback::ondemand::document_reference; + + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for value + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for document reference + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + +} deserialize{}; + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_DESERIALIZE_H +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +/* end file simdjson/generic/ondemand/deserialize.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + template + simdjson_warn_unused simdjson_inline error_code get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_warn_unused simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + simdjson_inline uint32_t peek_root_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -43568,7 +49325,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -43605,6 +49362,7 @@ struct simdjson_result : public fallback::im /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -43633,13 +49391,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -43656,22 +49415,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -43681,7 +49456,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -43799,7 +49574,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -43976,12 +49751,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -44010,7 +49787,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -44253,6 +50031,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -44320,7 +50106,7 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -44349,12 +50135,14 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -44381,7 +50169,8 @@ struct simdjson_result : public fallback::implementat * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -44401,7 +50190,22 @@ struct simdjson_result : public fallback::implementat simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -44425,6 +50229,7 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -44895,14 +50700,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -44922,7 +50727,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -45013,6 +50818,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -45219,6 +51025,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -45358,10 +51170,10 @@ struct simdjson_result : public fallback::i simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -45377,6 +51189,7 @@ struct simdjson_result : public fallback::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace fallback { @@ -45495,7 +51308,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -45583,6 +51398,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -45590,10 +51410,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -45619,14 +51439,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -45728,13 +51550,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -45762,6 +51610,314 @@ struct simdjson_result : public fallback::implementa #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for fallback */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for fallback: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace fallback { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(fallback::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace fallback { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::fallback::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::fallback::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::fallback::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::fallback::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for fallback */ + // All other declarations /* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for fallback */ @@ -45772,6 +51928,7 @@ struct simdjson_result : public fallback::implementa /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -45874,7 +52031,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -45884,6 +52041,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -45898,11 +52064,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -45975,8 +52172,30 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -46021,7 +52240,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -46045,6 +52265,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -46063,7 +52288,6 @@ namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -46076,6 +52300,8 @@ struct simdjson_result : public fallback::im simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -46092,6 +52318,7 @@ struct simdjson_result : public fallback::im /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -46203,7 +52430,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -46269,7 +52496,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46292,7 +52519,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46310,18 +52537,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -46333,7 +52560,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -46343,7 +52570,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -46408,7 +52635,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -46417,7 +52644,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -46488,12 +52715,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -46532,7 +52761,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -46567,11 +52797,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -46771,7 +53017,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -46789,17 +53035,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -46852,7 +53145,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -46861,7 +53154,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46874,7 +53167,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46896,14 +53189,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -46915,7 +53208,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -46925,12 +53218,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -46971,6 +53269,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -46999,7 +53298,7 @@ struct simdjson_result : public fallback::implemen simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -47012,6 +53311,9 @@ struct simdjson_result : public fallback::implemen template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using fallback::implementation_simdjson_result_base::operator*; + using fallback::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator fallback::ondemand::array() & noexcept(false); @@ -47051,6 +53353,12 @@ struct simdjson_result : public fallback::implemen simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -47077,7 +53385,7 @@ struct simdjson_result : public fallback simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -47128,6 +53436,12 @@ struct simdjson_result : public fallback simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -47220,7 +53534,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -47270,6 +53584,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -47283,6 +53598,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -47326,6 +53642,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -47337,6 +53658,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -47600,6 +53922,10 @@ struct simdjson_result : public fallback::implementat /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -47621,12 +53947,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -47669,7 +53997,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -47752,6 +54081,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -47797,11 +54135,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -47840,12 +54238,43 @@ struct simdjson_result : public fallback::implementa simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -47974,6 +54403,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -48045,28 +54488,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -48080,7 +54525,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -48089,7 +54533,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -48102,8 +54545,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -48120,7 +54578,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -48129,9 +54586,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - fallback::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, fallback::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -48162,7 +54623,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -48188,7 +54648,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + fallback::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::value &val, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::document &doc, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::document_reference &doc, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -48206,7 +54704,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -48231,17 +54728,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -48250,10 +54747,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + fallback::ondemand::object obj; + if constexpr (std::is_same_v, fallback::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for fallback */ // Inline definitions @@ -48346,7 +55162,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -48431,6 +55247,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -48489,6 +55366,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -48537,6 +55418,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace fallback } // namespace simdjson @@ -48573,7 +55457,9 @@ simdjson_inline simdjson_result &simdjson_re ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -48630,7 +55516,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -48672,15 +55558,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -48878,6 +55764,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace fallback } // namespace simdjson @@ -49128,6 +56027,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -49277,7 +56184,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -49303,15 +56210,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -49331,8 +56238,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -49355,1160 +56262,1980 @@ simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } -simdjson_inline simdjson_result document::begin() & noexcept { - return get_array().begin(); +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::end() & noexcept { - return {}; +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; -} -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace fallback { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace fallback } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } - template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } - return std::forward(first).get(); + return std::forward(first).get(); } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for fallback */ +/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; } -#endif +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; } +#ifdef SIMDJSON_THREADS_ENABLED -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -} // namespace simdjson +#endif // SIMDJSON_THREADS_ENABLED +} // namespace ondemand +} // namespace fallback +} // namespace simdjson namespace simdjson { -namespace fallback { -namespace ondemand { -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} -} // namespace ondemand -} // namespace fallback -} // namespace simdjson +} +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +namespace fallback { +namespace ondemand { +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); + +simdjson_inline value &field::value() & noexcept { + return second; } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return first.get_uint64(); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.get_uint64_in_string(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.get_int64(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.get_int64_in_string(); + return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - return first.get_double(); + return first.unescaped_key(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - return first.get_double_in_string(); + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -template <> -simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) & noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -template <> -simdjson_inline error_code simdjson_result::get(fallback::ondemand::document_reference &out) && noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; } -simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); } -simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); } -simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } -simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for fallback */ -/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} -namespace simdjson { -namespace fallback { -namespace ondemand { +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); + return TAPE_ERROR; } +#if SIMDJSON_DEVELOPMENT_CHECKS -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -#endif // SIMDJSON_THREADS_ENABLED - -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } #endif -} -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ -} -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; -} -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { -} +} // namespace ondemand +} // namespace fallback +} // namespace simdjson -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} +namespace simdjson { -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); -} +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; -} +} // namespace simdjson -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); -} +namespace simdjson { +namespace fallback { +namespace ondemand { -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } - } - #endif // SIMDJSON_THREADS_ENABLED +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; - } - } +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline number::operator double() const noexcept { + return get_double(); } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); - } +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); - - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace fallback @@ -50516,588 +58243,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ -/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ +/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace fallback { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace fallback -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for fallback */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ +/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace fallback { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace fallback @@ -51105,1516 +58963,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; } + } // namespace ondemand } // namespace fallback } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ -/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace fallback { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} - -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); -} - -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace fallback } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ -/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace fallback { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace fallback::ondemand; + fallback::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + fallback::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + fallback::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace fallback { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::fallback::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace fallback -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace fallback { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for fallback */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace fallback { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace fallback -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - fallback::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace fallback { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace fallback { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace fallback -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ -/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace fallback::ondemand; - fallback::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - fallback::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - fallback::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} -inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -} // namespace simdjson - -namespace simdjson { namespace fallback { namespace ondemand { -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::fallback::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace fallback { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -52623,1116 +60888,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for fallback: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace fallback { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; } +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_builder.h for fallback: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace fallback { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace fallback +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = fallback::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + fallback::builder::string_builder b(initial_capacity); + fallback::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = fallback::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + fallback::builder::string_builder b(initial_capacity); + fallback::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = fallback::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + fallback::builder::string_builder b(initial_capacity); + fallback::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for fallback */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for fallback: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for fallback */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::fallback::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::fallback::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); - } - return result; -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } - return _json_iter->skip_child(depth()); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + if (!found) { + return false; + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::fallback::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::fallback::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::fallback::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::fallback::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace fallback } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for fallback */ /* end file simdjson/generic/ondemand/amalgamated.h for fallback */ /* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ @@ -54421,7 +63620,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -54445,6 +63644,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -54511,7 +63735,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for haswell */ /* including simdjson/generic/ondemand/deserialize.h for haswell: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for haswell */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54520,55 +63744,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -54590,7 +63767,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -54601,28 +63778,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = haswell::ondemand::array; + using object_type = haswell::ondemand::object; using value_type = haswell::ondemand::value; using document_type = haswell::ondemand::document; using document_reference_type = haswell::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -54632,7 +63825,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for haswell */ /* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -54974,7 +64167,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -55058,8 +64251,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -55069,8 +64262,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -55107,7 +64300,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -55144,6 +64337,7 @@ struct simdjson_result : public haswell::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -55172,13 +64366,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -55195,22 +64390,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -55220,7 +64431,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -55338,7 +64549,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -55515,12 +64726,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -55549,7 +64762,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -55792,6 +65006,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -55859,7 +65081,7 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -55888,12 +65110,14 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -55920,7 +65144,8 @@ struct simdjson_result : public haswell::implementatio * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -55940,7 +65165,22 @@ struct simdjson_result : public haswell::implementatio simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -55964,6 +65204,7 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -56434,14 +65675,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -56461,7 +65702,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -56552,6 +65793,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -56758,6 +66000,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -56897,10 +66145,10 @@ struct simdjson_result : public haswell::imp simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -56916,6 +66164,7 @@ struct simdjson_result : public haswell::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace haswell { @@ -57034,7 +66283,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -57122,6 +66373,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -57129,10 +66385,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -57158,14 +66414,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -57267,13 +66525,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -57301,6 +66585,314 @@ struct simdjson_result : public haswell::implementati #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for haswell */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for haswell: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace haswell { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(haswell::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace haswell { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::haswell::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::haswell::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::haswell::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::haswell::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for haswell */ + // All other declarations /* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for haswell */ @@ -57311,6 +66903,7 @@ struct simdjson_result : public haswell::implementati /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -57413,7 +67006,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -57423,6 +67016,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -57437,11 +67039,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -57514,8 +67147,30 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -57560,7 +67215,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -57584,6 +67240,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -57602,7 +67263,6 @@ namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -57615,6 +67275,8 @@ struct simdjson_result : public haswell::impl simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -57631,6 +67293,7 @@ struct simdjson_result : public haswell::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -57742,7 +67405,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -57808,7 +67471,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -57831,7 +67494,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -57849,18 +67512,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -57872,7 +67535,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -57882,7 +67545,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -57947,7 +67610,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -57956,7 +67619,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -58027,12 +67690,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -58071,7 +67736,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -58106,11 +67772,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -58310,7 +67992,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -58328,17 +68010,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -58391,7 +68120,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -58400,7 +68129,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -58413,7 +68142,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -58435,14 +68164,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -58454,7 +68183,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -58464,12 +68193,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -58510,6 +68244,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -58538,7 +68273,7 @@ struct simdjson_result : public haswell::implementa simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -58551,6 +68286,9 @@ struct simdjson_result : public haswell::implementa template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using haswell::implementation_simdjson_result_base::operator*; + using haswell::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator haswell::ondemand::array() & noexcept(false); @@ -58590,6 +68328,12 @@ struct simdjson_result : public haswell::implementa simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -58616,7 +68360,7 @@ struct simdjson_result : public haswell:: simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -58667,6 +68411,12 @@ struct simdjson_result : public haswell:: simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -58759,7 +68509,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -58809,6 +68559,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -58822,6 +68573,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -58865,6 +68617,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -58876,6 +68633,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -59139,6 +68897,10 @@ struct simdjson_result : public haswell::implementatio /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -59160,12 +68922,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -59208,7 +68972,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -59291,6 +69056,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -59336,11 +69110,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -59379,12 +69213,43 @@ struct simdjson_result : public haswell::implementati simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -59513,6 +69378,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -59584,28 +69463,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -59619,7 +69500,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -59628,7 +69508,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -59641,8 +69520,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -59659,7 +69553,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -59668,9 +69561,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - haswell::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, haswell::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -59701,7 +69598,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -59727,7 +69623,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + haswell::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::value &val, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::document &doc, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::document_reference &doc, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -59745,7 +69679,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -59770,17 +69703,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -59789,10 +69722,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + haswell::ondemand::object obj; + if constexpr (std::is_same_v, haswell::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for haswell */ // Inline definitions @@ -59885,7 +70137,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -59970,6 +70222,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -60028,6 +70341,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -60076,6 +70393,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace haswell } // namespace simdjson @@ -60112,7 +70432,9 @@ simdjson_inline simdjson_result &simdjson_res ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -60169,7 +70491,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -60211,15 +70533,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -60417,6 +70739,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace haswell } // namespace simdjson @@ -60667,6 +71002,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -60816,7 +71159,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -60842,15 +71185,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -60870,8 +71213,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -60913,1141 +71256,1961 @@ simdjson_inline simdjson_result document::find_field_unordered(std::strin simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace haswell { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace haswell } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for haswell */ +/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -#endif +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -} // namespace simdjson - +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} -namespace simdjson { -namespace haswell { -namespace ondemand { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -} // namespace ondemand -} // namespace haswell -} // namespace simdjson + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} -namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } + template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.type(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.is_scalar(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.is_string(); + return first.unescaped_key(allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) & noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return first.unescaped_key(receiver, allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(haswell::ondemand::document_reference &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return first.at_path(json_path); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for haswell */ -/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -namespace simdjson { -namespace haswell { -namespace ondemand { +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#endif + + +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); } - #endif // SIMDJSON_THREADS_ENABLED + return true; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +} // namespace ondemand +} // namespace haswell +} // namespace simdjson - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +namespace simdjson { - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); } - } + return out; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace haswell @@ -62055,588 +73218,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ -/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ +/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace haswell { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace haswell -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for haswell */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ +/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace haswell { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace haswell @@ -62644,1516 +73938,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace haswell } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ -/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace haswell { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} - -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); } -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace haswell } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ -/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace haswell { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace haswell::ondemand; + haswell::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + haswell::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + haswell::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace haswell { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::haswell::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace haswell -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace haswell { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); -} +} // namespace ondemand +} // namespace haswell +} // namespace simdjson -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for haswell */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace haswell { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace haswell -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - haswell::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace haswell { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace haswell -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace haswell { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace haswell -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ -/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + -inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace haswell::ondemand; - haswell::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - haswell::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - haswell::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -} // namespace simdjson - -namespace simdjson { namespace haswell { namespace ondemand { -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::haswell::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace haswell { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; -} -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -64162,1116 +75863,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for haswell: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace haswell { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} + +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_builder.h for haswell: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace haswell { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace haswell +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = haswell::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + haswell::builder::string_builder b(initial_capacity); + haswell::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = haswell::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + haswell::builder::string_builder b(initial_capacity); + haswell::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = haswell::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + haswell::builder::string_builder b(initial_capacity); + haswell::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for haswell */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for haswell: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for haswell */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::haswell::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + constexpr std::string_view key_view() const { + return {key, N - 1}; } - return result; +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::haswell::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } - return _json_iter->skip_child(depth()); -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (!found) { + return false; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; + + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::haswell::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::haswell::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::haswell::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::haswell::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace haswell } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for haswell */ /* end file simdjson/generic/ondemand/amalgamated.h for haswell */ /* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ @@ -65578,7 +78213,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -65897,7 +78531,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 64; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } @@ -65921,6 +78555,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -66047,7 +78710,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for icelake */ /* including simdjson/generic/ondemand/deserialize.h for icelake: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for icelake */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -66056,55 +78719,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -66126,7 +78742,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -66137,28 +78753,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = icelake::ondemand::array; + using object_type = icelake::ondemand::object; using value_type = icelake::ondemand::value; using document_type = icelake::ondemand::document; using document_reference_type = icelake::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -66168,7 +78800,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for icelake */ /* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -66510,7 +79142,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -66594,8 +79226,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -66605,8 +79237,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -66643,7 +79275,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -66680,6 +79312,7 @@ struct simdjson_result : public icelake::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -66708,13 +79341,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -66731,22 +79365,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -66756,7 +79406,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -66874,7 +79524,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -67051,12 +79701,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -67085,7 +79737,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -67328,6 +79981,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -67395,7 +80056,7 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -67424,12 +80085,14 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -67456,7 +80119,8 @@ struct simdjson_result : public icelake::implementatio * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -67476,7 +80140,22 @@ struct simdjson_result : public icelake::implementatio simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -67500,6 +80179,7 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -67970,14 +80650,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -67997,7 +80677,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -68088,6 +80768,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -68294,6 +80975,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -68433,10 +81120,10 @@ struct simdjson_result : public icelake::imp simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -68452,6 +81139,7 @@ struct simdjson_result : public icelake::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace icelake { @@ -68570,7 +81258,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -68658,6 +81348,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -68665,10 +81360,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -68694,14 +81389,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -68803,13 +81500,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -68837,6 +81560,314 @@ struct simdjson_result : public icelake::implementati #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for icelake */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for icelake: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace icelake { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(icelake::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace icelake { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::icelake::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::icelake::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::icelake::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::icelake::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for icelake */ + // All other declarations /* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for icelake */ @@ -68847,6 +81878,7 @@ struct simdjson_result : public icelake::implementati /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -68949,7 +81981,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -68959,6 +81991,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -68973,11 +82014,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -69050,8 +82122,30 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -69096,7 +82190,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -69120,6 +82215,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -69138,7 +82238,6 @@ namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -69151,6 +82250,8 @@ struct simdjson_result : public icelake::impl simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -69167,6 +82268,7 @@ struct simdjson_result : public icelake::impl /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -69278,7 +82380,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -69344,7 +82446,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69367,7 +82469,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69385,18 +82487,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -69408,7 +82510,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -69418,7 +82520,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -69483,7 +82585,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -69492,7 +82594,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -69563,12 +82665,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -69607,7 +82711,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -69642,11 +82747,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -69846,7 +82967,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -69864,17 +82985,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -69927,7 +83095,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -69936,7 +83104,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69949,7 +83117,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69971,14 +83139,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -69990,7 +83158,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -70000,12 +83168,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -70046,6 +83219,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -70074,7 +83248,7 @@ struct simdjson_result : public icelake::implementa simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -70087,6 +83261,9 @@ struct simdjson_result : public icelake::implementa template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using icelake::implementation_simdjson_result_base::operator*; + using icelake::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator icelake::ondemand::array() & noexcept(false); @@ -70126,6 +83303,12 @@ struct simdjson_result : public icelake::implementa simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -70152,7 +83335,7 @@ struct simdjson_result : public icelake:: simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -70203,6 +83386,12 @@ struct simdjson_result : public icelake:: simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -70295,7 +83484,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -70345,6 +83534,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -70358,6 +83548,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -70401,6 +83592,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -70412,6 +83608,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -70675,6 +83872,10 @@ struct simdjson_result : public icelake::implementatio /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -70696,12 +83897,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -70744,7 +83947,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -70827,6 +84031,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -70872,11 +84085,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -70915,12 +84188,43 @@ struct simdjson_result : public icelake::implementati simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -71049,6 +84353,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -71120,28 +84438,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -71155,7 +84475,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -71164,7 +84483,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -71177,8 +84495,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -71195,7 +84528,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -71204,9 +84536,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - icelake::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, icelake::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -71237,7 +84573,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -71263,7 +84598,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + icelake::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::value &val, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::document &doc, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::document_reference &doc, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -71281,7 +84654,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -71306,17 +84678,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -71325,10 +84697,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + icelake::ondemand::object obj; + if constexpr (std::is_same_v, icelake::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for icelake */ // Inline definitions @@ -71421,7 +85112,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -71506,6 +85197,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -71564,6 +85316,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -71612,6 +85368,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace icelake } // namespace simdjson @@ -71648,7 +85407,9 @@ simdjson_inline simdjson_result &simdjson_res ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -71705,7 +85466,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -71747,15 +85508,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -71953,6 +85714,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace icelake } // namespace simdjson @@ -72203,6 +85977,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -72352,7 +86134,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -72378,15 +86160,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -72406,8 +86188,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -72449,1141 +86231,1961 @@ simdjson_inline simdjson_result document::find_field_unordered(std::strin simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace icelake { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace icelake } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for icelake */ +/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -#endif +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -} // namespace simdjson - +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} -namespace simdjson { -namespace icelake { -namespace ondemand { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -} // namespace ondemand -} // namespace icelake -} // namespace simdjson + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} -namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } + template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.type(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.is_scalar(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.is_string(); + return first.unescaped_key(allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) & noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return first.unescaped_key(receiver, allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(icelake::ondemand::document_reference &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return first.at_path(json_path); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for icelake */ -/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -namespace simdjson { -namespace icelake { -namespace ondemand { +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#endif + + +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); } - #endif // SIMDJSON_THREADS_ENABLED + return true; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +} // namespace ondemand +} // namespace icelake +} // namespace simdjson - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +namespace simdjson { - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); } - } + return out; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace icelake @@ -73591,588 +88193,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ -/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ +/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace icelake { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace icelake -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for icelake */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ +/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace icelake { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace icelake @@ -74180,1516 +88913,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace icelake } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ -/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace icelake { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} - -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); -} - -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace icelake } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ -/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace icelake { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace icelake::ondemand; + icelake::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + icelake::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + icelake::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace icelake { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::icelake::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace icelake -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace icelake { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for icelake */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace icelake { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace icelake -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - icelake::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace icelake { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace icelake -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace icelake { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace icelake -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ -/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace icelake::ondemand; - icelake::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - icelake::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - icelake::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} -inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -} // namespace simdjson -namespace simdjson { namespace icelake { namespace ondemand { - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::icelake::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace icelake { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -75698,1116 +90838,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for icelake: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace icelake { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_builder.h for icelake: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace icelake { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace icelake +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = icelake::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + icelake::builder::string_builder b(initial_capacity); + icelake::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = icelake::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + icelake::builder::string_builder b(initial_capacity); + icelake::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = icelake::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + icelake::builder::string_builder b(initial_capacity); + icelake::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for icelake */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for icelake: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for icelake */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::icelake::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::icelake::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); - } - return result; -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } - return _json_iter->skip_child(depth()); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + if (!found) { + return false; + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::icelake::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::icelake::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::icelake::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::icelake::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace icelake } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for icelake */ /* end file simdjson/generic/ondemand/amalgamated.h for icelake */ /* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ @@ -77591,7 +93665,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { @@ -77632,6 +93706,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -77700,7 +93800,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for ppc64 */ /* including simdjson/generic/ondemand/deserialize.h for ppc64: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for ppc64 */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -77709,55 +93809,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -77779,7 +93832,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -77790,28 +93843,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = ppc64::ondemand::array; + using object_type = ppc64::ondemand::object; using value_type = ppc64::ondemand::value; using document_type = ppc64::ondemand::document; using document_reference_type = ppc64::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -77821,7 +93890,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for ppc64 */ /* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -78163,7 +94232,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -78247,8 +94316,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -78258,8 +94327,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -78296,7 +94365,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -78333,6 +94402,7 @@ struct simdjson_result : public ppc64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -78361,13 +94431,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -78384,22 +94455,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -78409,7 +94496,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -78527,7 +94614,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -78704,12 +94791,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -78738,7 +94827,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -78981,6 +95071,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -79048,7 +95146,7 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -79077,12 +95175,14 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -79109,7 +95209,8 @@ struct simdjson_result : public ppc64::implementation_si * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -79129,7 +95230,22 @@ struct simdjson_result : public ppc64::implementation_si simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -79153,6 +95269,7 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -79623,14 +95740,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -79650,7 +95767,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -79741,6 +95858,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -79947,6 +96065,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -80086,10 +96210,10 @@ struct simdjson_result : public ppc64::impleme simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -80105,6 +96229,7 @@ struct simdjson_result : public ppc64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace ppc64 { @@ -80223,7 +96348,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -80311,6 +96438,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -80318,10 +96450,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -80347,14 +96479,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -80456,13 +96590,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -80490,6 +96650,314 @@ struct simdjson_result : public ppc64::implementation_s #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for ppc64 */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for ppc64: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace ppc64 { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(ppc64::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace ppc64 { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::ppc64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::ppc64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::ppc64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::ppc64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for ppc64 */ + // All other declarations /* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for ppc64 */ @@ -80500,6 +96968,7 @@ struct simdjson_result : public ppc64::implementation_s /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -80602,7 +97071,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -80612,6 +97081,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -80626,11 +97104,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -80703,8 +97212,30 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -80749,7 +97280,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -80773,6 +97305,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -80791,7 +97328,6 @@ namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -80804,6 +97340,8 @@ struct simdjson_result : public ppc64::implemen simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -80820,6 +97358,7 @@ struct simdjson_result : public ppc64::implemen /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -80931,7 +97470,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -80997,7 +97536,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81020,7 +97559,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81038,18 +97577,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -81061,7 +97600,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -81071,7 +97610,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -81136,7 +97675,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -81145,7 +97684,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -81216,12 +97755,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -81260,7 +97801,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -81295,11 +97837,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -81499,7 +98057,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -81517,17 +98075,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -81580,7 +98185,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -81589,7 +98194,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81602,7 +98207,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81624,14 +98229,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -81643,7 +98248,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -81653,12 +98258,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -81699,6 +98309,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -81727,7 +98338,7 @@ struct simdjson_result : public ppc64::implementation simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -81740,6 +98351,9 @@ struct simdjson_result : public ppc64::implementation template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using ppc64::implementation_simdjson_result_base::operator*; + using ppc64::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator ppc64::ondemand::array() & noexcept(false); @@ -81779,6 +98393,12 @@ struct simdjson_result : public ppc64::implementation simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -81805,7 +98425,7 @@ struct simdjson_result : public ppc64::impl simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -81856,6 +98476,12 @@ struct simdjson_result : public ppc64::impl simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -81948,7 +98574,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -81998,6 +98624,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -82011,6 +98638,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -82054,6 +98682,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -82065,6 +98698,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -82328,6 +98962,10 @@ struct simdjson_result : public ppc64::implementation_si /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -82349,12 +98987,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -82397,7 +99037,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -82480,6 +99121,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -82525,11 +99175,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -82568,12 +99278,43 @@ struct simdjson_result : public ppc64::implementation_s simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -82702,6 +99443,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -82773,28 +99528,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -82808,7 +99565,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -82817,7 +99573,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -82830,8 +99585,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -82848,7 +99618,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -82857,9 +99626,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - ppc64::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, ppc64::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -82890,7 +99663,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -82916,7 +99688,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + ppc64::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::value &val, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::document &doc, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::document_reference &doc, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -82934,7 +99744,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -82959,17 +99768,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -82978,10 +99787,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + ppc64::ondemand::object obj; + if constexpr (std::is_same_v, ppc64::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for ppc64 */ // Inline definitions @@ -83074,7 +100202,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -83159,6 +100287,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -83217,6 +100406,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -83265,6 +100458,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace ppc64 } // namespace simdjson @@ -83301,7 +100497,9 @@ simdjson_inline simdjson_result &simdjson_resul ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -83358,7 +100556,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -83400,15 +100598,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -83606,6 +100804,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace ppc64 } // namespace simdjson @@ -83856,6 +101067,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -84005,7 +101224,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -84031,15 +101250,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -84059,8 +101278,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -84102,1141 +101321,1961 @@ simdjson_inline simdjson_result document::find_field_unordered(std::strin simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace ppc64 { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace ppc64 } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -#endif +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -} // namespace simdjson - +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} -namespace simdjson { -namespace ppc64 { -namespace ondemand { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} -namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } + template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.type(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.is_scalar(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.is_string(); + return first.unescaped_key(allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) & noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return first.unescaped_key(receiver, allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document_reference &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return first.at_path(json_path); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -namespace simdjson { -namespace ppc64 { -namespace ondemand { +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#endif + + +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); } - #endif // SIMDJSON_THREADS_ENABLED + return true; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +namespace simdjson { - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); } - } + return out; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace ppc64 @@ -85244,588 +103283,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace ppc64 { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace ppc64 { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace ppc64 @@ -85833,1516 +104003,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace ppc64 } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace ppc64 { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} - -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); -} - -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace ppc64 } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace ppc64 { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace ppc64::ondemand; + ppc64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + ppc64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + ppc64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace ppc64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::ppc64::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace ppc64 { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace ppc64 { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - ppc64::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace ppc64 { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace ppc64 { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace ppc64::ondemand; - ppc64::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - ppc64::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - ppc64::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} -inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -} // namespace simdjson -namespace simdjson { namespace ppc64 { namespace ondemand { - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::ppc64::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace ppc64 { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -87351,1116 +105928,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for ppc64: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace ppc64 { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_builder.h for ppc64: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace ppc64 { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace ppc64 +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = ppc64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + ppc64::builder::string_builder b(initial_capacity); + ppc64::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = ppc64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + ppc64::builder::string_builder b(initial_capacity); + ppc64::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = ppc64::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + ppc64::builder::string_builder b(initial_capacity); + ppc64::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for ppc64 */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for ppc64: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for ppc64 */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::ppc64::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::ppc64::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); - } - return result; -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } - return _json_iter->skip_child(depth()); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + if (!found) { + return false; + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::ppc64::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::ppc64::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::ppc64::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::ppc64::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace ppc64 } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for ppc64 */ /* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ /* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ @@ -89578,7 +109089,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -89604,6 +109115,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -89670,7 +109206,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for westmere */ /* including simdjson/generic/ondemand/deserialize.h for westmere: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for westmere */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -89679,55 +109215,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -89749,7 +109238,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -89760,28 +109249,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = westmere::ondemand::array; + using object_type = westmere::ondemand::object; using value_type = westmere::ondemand::value; using document_type = westmere::ondemand::document; using document_reference_type = westmere::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -89791,7 +109296,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for westmere */ /* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -90133,7 +109638,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -90217,8 +109722,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -90228,8 +109733,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -90266,7 +109771,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -90303,6 +109808,7 @@ struct simdjson_result : public westmere::im /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -90331,13 +109837,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -90354,22 +109861,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -90379,7 +109902,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -90497,7 +110020,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -90674,12 +110197,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -90708,7 +110233,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -90951,6 +110477,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -91018,7 +110552,7 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -91047,12 +110581,14 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -91079,7 +110615,8 @@ struct simdjson_result : public westmere::implementat * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -91099,7 +110636,22 @@ struct simdjson_result : public westmere::implementat simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -91123,6 +110675,7 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -91593,14 +111146,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -91620,7 +111173,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -91711,6 +111264,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -91917,6 +111471,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -92056,10 +111616,10 @@ struct simdjson_result : public westmere::i simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -92075,6 +111635,7 @@ struct simdjson_result : public westmere::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace westmere { @@ -92193,7 +111754,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -92281,6 +111844,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -92288,10 +111856,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -92317,14 +111885,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -92426,13 +111996,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -92460,6 +112056,314 @@ struct simdjson_result : public westmere::implementa #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for westmere */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for westmere: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace westmere { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(westmere::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace westmere { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::westmere::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::westmere::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::westmere::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::westmere::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for westmere */ + // All other declarations /* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for westmere */ @@ -92470,6 +112374,7 @@ struct simdjson_result : public westmere::implementa /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -92572,7 +112477,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -92582,6 +112487,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -92596,11 +112510,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -92673,8 +112618,30 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -92719,7 +112686,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -92743,6 +112711,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -92761,7 +112734,6 @@ namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -92774,6 +112746,8 @@ struct simdjson_result : public westmere::im simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -92790,6 +112764,7 @@ struct simdjson_result : public westmere::im /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -92901,7 +112876,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -92967,7 +112942,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -92990,7 +112965,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -93008,18 +112983,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -93031,7 +113006,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -93041,7 +113016,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -93106,7 +113081,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -93115,7 +113090,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -93186,12 +113161,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -93230,7 +113207,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -93265,11 +113243,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -93469,7 +113463,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -93487,17 +113481,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -93550,7 +113591,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -93559,7 +113600,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -93572,7 +113613,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -93594,14 +113635,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -93613,7 +113654,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -93623,12 +113664,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -93669,6 +113715,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -93697,7 +113744,7 @@ struct simdjson_result : public westmere::implemen simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -93710,6 +113757,9 @@ struct simdjson_result : public westmere::implemen template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using westmere::implementation_simdjson_result_base::operator*; + using westmere::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator westmere::ondemand::array() & noexcept(false); @@ -93749,6 +113799,12 @@ struct simdjson_result : public westmere::implemen simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -93775,7 +113831,7 @@ struct simdjson_result : public westmere simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -93826,6 +113882,12 @@ struct simdjson_result : public westmere simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -93918,7 +113980,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -93968,6 +114030,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -93981,6 +114044,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -94024,6 +114088,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -94035,6 +114104,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -94298,6 +114368,10 @@ struct simdjson_result : public westmere::implementat /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -94319,12 +114393,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -94367,7 +114443,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -94450,6 +114527,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -94495,11 +114581,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -94538,12 +114684,43 @@ struct simdjson_result : public westmere::implementa simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -94672,6 +114849,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -94743,28 +114934,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -94778,7 +114971,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -94787,7 +114979,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -94800,8 +114991,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -94818,7 +115024,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -94827,9 +115032,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - westmere::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, westmere::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -94860,7 +115069,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -94886,7 +115094,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + westmere::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::value &val, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::document &doc, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::document_reference &doc, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -94904,7 +115150,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -94929,17 +115174,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -94948,10 +115193,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + westmere::ondemand::object obj; + if constexpr (std::is_same_v, westmere::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for westmere */ // Inline definitions @@ -95044,7 +115608,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -95129,6 +115693,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -95187,6 +115812,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -95235,6 +115864,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace westmere } // namespace simdjson @@ -95271,7 +115903,9 @@ simdjson_inline simdjson_result &simdjson_re ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -95328,7 +115962,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -95370,15 +116004,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -95576,6 +116210,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace westmere } // namespace simdjson @@ -95826,6 +116473,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -95975,7 +116630,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -96001,15 +116656,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -96029,8 +116684,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -96072,1141 +116727,1961 @@ simdjson_inline simdjson_result document::find_field_unordered(std::strin simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace westmere { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace westmere } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for westmere */ +/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -#endif +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -} // namespace simdjson - +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} -namespace simdjson { -namespace westmere { -namespace ondemand { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} -namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } + template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.type(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.is_scalar(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.is_string(); + return first.unescaped_key(allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) & noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return first.unescaped_key(receiver, allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(westmere::ondemand::document_reference &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return first.at_path(json_path); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for westmere */ -/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -namespace simdjson { -namespace westmere { -namespace ondemand { +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#endif + + +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); } - #endif // SIMDJSON_THREADS_ENABLED + return true; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +} // namespace ondemand +} // namespace westmere +} // namespace simdjson - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +namespace simdjson { - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); } - } + return out; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace westmere @@ -97214,588 +118689,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ -/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ +/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace westmere { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace westmere } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for westmere */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ +/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace westmere { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace westmere @@ -97803,1516 +119409,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace westmere } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ -/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace westmere { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} - -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); -} - -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace westmere } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ -/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace westmere { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace westmere::ondemand; + westmere::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + westmere::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + westmere::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace westmere { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::westmere::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace westmere -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace westmere { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for westmere */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace westmere { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace westmere -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - westmere::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace westmere { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace westmere -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace westmere { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace westmere -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ -/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace westmere::ondemand; - westmere::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - westmere::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - westmere::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} -inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -} // namespace simdjson -namespace simdjson { namespace westmere { namespace ondemand { - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::westmere::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace westmere { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -99321,1116 +121334,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for westmere: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace westmere { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_builder.h for westmere: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace westmere { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace westmere +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = westmere::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + westmere::builder::string_builder b(initial_capacity); + westmere::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = westmere::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + westmere::builder::string_builder b(initial_capacity); + westmere::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = westmere::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + westmere::builder::string_builder b(initial_capacity); + westmere::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for westmere */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for westmere: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for westmere */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::westmere::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::westmere::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); - } - return result; -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } - return _json_iter->skip_child(depth()); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + if (!found) { + return false; + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::westmere::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::westmere::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::westmere::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::westmere::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace westmere } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for westmere */ /* end file simdjson/generic/ondemand/amalgamated.h for westmere */ /* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ @@ -101020,7 +123967,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -101049,6 +123996,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + static_cast((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -101117,7 +124089,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for lsx */ /* including simdjson/generic/ondemand/deserialize.h for lsx: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for lsx */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -101126,55 +124098,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -101196,7 +124121,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -101207,28 +124132,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = lsx::ondemand::array; + using object_type = lsx::ondemand::object; using value_type = lsx::ondemand::value; using document_type = lsx::ondemand::document; using document_reference_type = lsx::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -101238,7 +124179,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for lsx */ /* including simdjson/generic/ondemand/value_iterator.h for lsx: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -101580,7 +124521,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -101664,8 +124605,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -101675,8 +124616,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -101713,7 +124654,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -101750,6 +124691,7 @@ struct simdjson_result : public lsx::implementati /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -101778,13 +124720,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -101801,22 +124744,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -101826,7 +124785,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -101944,7 +124903,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -102121,12 +125080,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -102155,7 +125116,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -102398,6 +125360,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -102465,7 +125435,7 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -102494,12 +125464,14 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -102526,7 +125498,8 @@ struct simdjson_result : public lsx::implementation_simdjs * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -102546,7 +125519,22 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -102570,6 +125558,7 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -103040,14 +126029,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -103067,7 +126056,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -103158,6 +126147,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -103364,6 +126354,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -103503,10 +126499,10 @@ struct simdjson_result : public lsx::implementat simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -103522,6 +126518,7 @@ struct simdjson_result : public lsx::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace lsx { @@ -103640,7 +126637,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -103728,6 +126727,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -103735,10 +126739,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -103764,14 +126768,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -103873,13 +126879,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -103907,6 +126939,314 @@ struct simdjson_result : public lsx::implementation_simdj #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for lsx */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for lsx: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace lsx { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(lsx::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace lsx { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::lsx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::lsx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::lsx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::lsx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for lsx */ + // All other declarations /* including simdjson/generic/ondemand/array.h for lsx: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for lsx */ @@ -103917,6 +127257,7 @@ struct simdjson_result : public lsx::implementation_simdj /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -104019,7 +127360,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -104029,6 +127370,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -104043,11 +127393,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -104120,8 +127501,30 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -104166,7 +127569,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -104190,6 +127594,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -104208,7 +127617,6 @@ namespace simdjson { template<> struct simdjson_result : public lsx::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(lsx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -104221,6 +127629,8 @@ struct simdjson_result : public lsx::implementati simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -104237,6 +127647,7 @@ struct simdjson_result : public lsx::implementati /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -104348,7 +127759,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -104414,7 +127825,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -104437,7 +127848,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -104455,18 +127866,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -104478,7 +127889,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -104488,7 +127899,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -104553,7 +127964,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -104562,7 +127973,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -104633,12 +128044,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -104677,7 +128090,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -104712,11 +128126,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -104916,7 +128346,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -104934,17 +128364,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -104997,7 +128474,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -105006,7 +128483,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -105019,7 +128496,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -105041,14 +128518,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -105060,7 +128537,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -105070,12 +128547,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -105116,6 +128598,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -105144,7 +128627,7 @@ struct simdjson_result : public lsx::implementation_sim simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -105157,6 +128640,9 @@ struct simdjson_result : public lsx::implementation_sim template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using lsx::implementation_simdjson_result_base::operator*; + using lsx::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator lsx::ondemand::array() & noexcept(false); @@ -105196,6 +128682,12 @@ struct simdjson_result : public lsx::implementation_sim simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -105222,7 +128714,7 @@ struct simdjson_result : public lsx::implemen simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -105273,6 +128765,12 @@ struct simdjson_result : public lsx::implemen simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -105365,7 +128863,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -105415,6 +128913,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -105428,6 +128927,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -105471,6 +128971,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -105482,6 +128987,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -105745,6 +129251,10 @@ struct simdjson_result : public lsx::implementation_simdjs /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -105766,12 +129276,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -105814,7 +129326,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -105897,6 +129410,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -105942,11 +129464,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -105985,12 +129567,43 @@ struct simdjson_result : public lsx::implementation_simdj simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -106119,6 +129732,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -106190,28 +129817,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -106225,7 +129854,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -106234,7 +129862,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -106247,8 +129874,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -106265,7 +129907,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -106274,9 +129915,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - lsx::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, lsx::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -106307,7 +129952,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -106333,7 +129977,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + lsx::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::value &val, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::document &doc, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::document_reference &doc, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -106351,7 +130033,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -106376,17 +130057,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -106395,10 +130076,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + lsx::ondemand::object obj; + if constexpr (std::is_same_v, lsx::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for lsx */ // Inline definitions @@ -106491,7 +130491,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -106576,6 +130576,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -106634,6 +130695,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -106682,6 +130747,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace lsx } // namespace simdjson @@ -106718,7 +130786,9 @@ simdjson_inline simdjson_result &simdjson_result< ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -106775,7 +130845,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -106817,15 +130887,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -107023,6 +131093,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace lsx } // namespace simdjson @@ -107273,6 +131356,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -107422,7 +131513,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -107448,15 +131539,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -107476,8 +131567,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -107519,1141 +131610,1961 @@ simdjson_inline simdjson_result document::find_field_unordered(std::strin simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace lsx { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace lsx } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - lsx::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for lsx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lsx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lsx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(lsx::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -#endif +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); -} +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -} // namespace simdjson - +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} -namespace simdjson { -namespace lsx { -namespace ondemand { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } -} // namespace ondemand -} // namespace lsx -} // namespace simdjson + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} -namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for lsx */ +/* including simdjson/generic/ondemand/field-inl.h for lsx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(allow_replacement); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } + template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); -} -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { - if (error()) { return error(); } - return first.get_wobbly_string(); +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); + +simdjson_inline value &field::value() & noexcept { + return second; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return std::forward(first).get(out); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::type() noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.type(); + return first.key_raw_json_token(); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.is_scalar(); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.is_string(); + return first.unescaped_key(allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) & noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return first.unescaped_key(receiver, allow_replacement); } -template <> -simdjson_inline error_code simdjson_result::get(lsx::ondemand::document_reference &out) && noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - out = first; - return SUCCESS; + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -simdjson_inline simdjson_result::operator lsx::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result::operator lsx::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result::operator lsx::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result::operator lsx::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } } - return first.at_path(json_path); + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for lsx */ -/* including simdjson/generic/ondemand/document_stream-inl.h for lsx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -#include -#include +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} -namespace simdjson { -namespace lsx { -namespace ondemand { +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; } -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; } -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; } -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; } -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#endif + + +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); } - #endif // SIMDJSON_THREADS_ENABLED + return true; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +} // namespace ondemand +} // namespace lsx +} // namespace simdjson - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +namespace simdjson { - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lsx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); } - } + return out; } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace lsx @@ -108661,588 +133572,719 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - lsx::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for lsx */ -/* including simdjson/generic/ondemand/field-inl.h for lsx: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for lsx */ +/* including simdjson/generic/ondemand/logger-inl.h for lsx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace lsx { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace lsx -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - lsx::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for lsx */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for lsx */ +/* including simdjson/generic/ondemand/object-inl.h for lsx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lsx { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - return count == 0; + return value(iter.child()); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); } - -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; } - -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; } - -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +} // namespace simdjson -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for lsx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } -} +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#endif +namespace simdjson { +namespace lsx { +namespace ondemand { +// +// object_iterator +// -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// } // namespace ondemand } // namespace lsx @@ -109250,1516 +134292,1923 @@ simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const ui namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result( + lsx::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for lsx */ -/* including simdjson/generic/ondemand/json_type-inl.h for lsx: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/parser-inl.h for lsx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lsx { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace lsx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for lsx */ -/* including simdjson/generic/ondemand/logger-inl.h for lsx: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for lsx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lsx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace lsx { namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); -} - -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; -} -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; -} - -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); -} -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; } } + if(r[pos] != '"') { return false; } + return true; } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); -} - -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace lsx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for lsx */ -/* including simdjson/generic/ondemand/object-inl.h for lsx: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lsx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace lsx { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(lsx::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace lsx::ondemand; + lsx::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + lsx::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + lsx::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace lsx { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); } - -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::lsx::ondemand -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for lsx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -} // namespace ondemand -} // namespace lsx -} // namespace simdjson +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { +namespace lsx { +namespace ondemand { -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; + +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; } -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +} // namespace ondemand +} // namespace lsx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for lsx */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lsx { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace lsx -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - lsx::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for lsx */ -/* including simdjson/generic/ondemand/parser-inl.h for lsx: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace lsx { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - json.remove_utf8_bom(); + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace lsx -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for lsx */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for lsx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace lsx { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } - -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace lsx -} // namespace simdjson - -namespace simdjson { +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } -} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lsx */ -/* including simdjson/generic/ondemand/serialization-inl.h for lsx: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} -namespace simdjson { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(lsx::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(lsx::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + return SUCCESS; } -inline simdjson_result to_json_string(lsx::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace lsx::ondemand; - lsx::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - lsx::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - lsx::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} -inline simdjson_result to_json_string(lsx::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(lsx::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -} // namespace simdjson -namespace simdjson { namespace lsx { namespace ondemand { - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lsx::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -#endif -}}} // namespace simdjson::lsx::ondemand - -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for lsx */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace lsx { -namespace ondemand { -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } - -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -110768,1116 +136217,2050 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for lsx */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for lsx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for lsx: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace lsx { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} +**/ -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; } } - return SUCCESS; + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; } + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + is_valid = true; } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +namespace internal { - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} + +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +#endif - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); - } +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; } - return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } } +#endif -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +} // namespace builder +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_builder.h for lsx: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace lsx { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; - } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; + }; + + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; + +} // namespace builder +} // namespace lsx +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = lsx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lsx::builder::string_builder b(initial_capacity); + lsx::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = lsx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lsx::builder::string_builder b(initial_capacity); + lsx::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = lsx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lsx::builder::string_builder b(initial_capacity); + lsx::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for lsx */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for lsx: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for lsx */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace lsx { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::lsx::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::lsx::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; + } + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); + } + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); - } - return result; -} + auto current_type = ^^T; + std::size_t i = parser.skip_root(); -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } - return _json_iter->skip_child(depth()); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + if (!found) { + return false; + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; + } +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::lsx::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::lsx::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } + } - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; + } -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; - return SUCCESS; -} + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + return count; + } - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; - assert_at_non_root_start(); - return _json_iter->peek(); -} + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } + return pointer_str.substr(pos, token_end - pos); + } +}; - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (!found) return false; + } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} + pos = token_end + 1; + } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} + return true; + } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); -} + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } + } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); -} + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); + } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } -} -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } + + pos = token_end + 1; + } + + return current_type; + } +}; + +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::lsx::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::lsx::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } +} // namespace json_path } // namespace ondemand } // namespace lsx } // namespace simdjson -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(lsx::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H +/* end file simdjson/generic/ondemand/compile_time_accessors.h for lsx */ /* end file simdjson/generic/ondemand/amalgamated.h for lsx */ /* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ @@ -112486,7 +138869,7 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } simdjson_inline bool has_backslash() { return bs_bits != 0; } @@ -112509,6 +138892,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -112577,7 +138985,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for lasx */ /* including simdjson/generic/ondemand/deserialize.h for lasx: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for lasx */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -112586,55 +138994,8 @@ class value_iterator; /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include namespace simdjson { -namespace tag_invoke_fn_ns { -void tag_invoke(); - -struct tag_invoke_fn { - template - requires requires(Tag tag, Args &&...args) { - tag_invoke(std::forward(tag), std::forward(args)...); - } - constexpr auto operator()(Tag tag, Args &&...args) const - noexcept(noexcept(tag_invoke(std::forward(tag), - std::forward(args)...))) - -> decltype(tag_invoke(std::forward(tag), - std::forward(args)...)) { - return tag_invoke(std::forward(tag), std::forward(args)...); - } -}; -} // namespace tag_invoke_fn_ns - -inline namespace tag_invoke_ns { -inline constexpr tag_invoke_fn_ns::tag_invoke_fn tag_invoke = {}; -} // namespace tag_invoke_ns - -template -concept tag_invocable = requires(Tag tag, Args... args) { - tag_invoke(std::forward(tag), std::forward(args)...); -}; - -template -concept nothrow_tag_invocable = - tag_invocable && requires(Tag tag, Args... args) { - { - tag_invoke(std::forward(tag), std::forward(args)...) - } noexcept; - }; - -template -using tag_invoke_result = - std::invoke_result; - -template -using tag_invoke_result_t = - std::invoke_result_t; - -template using tag_t = std::decay_t; - - struct deserialize_tag; /// These types are deserializable in a built-in way @@ -112656,7 +139017,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -112667,28 +139028,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = lasx::ondemand::array; + using object_type = lasx::ondemand::object; using value_type = lasx::ondemand::value; using document_type = lasx::ondemand::document; using document_reference_type = lasx::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -112698,7 +139075,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for lasx */ /* including simdjson/generic/ondemand/value_iterator.h for lasx: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -113040,7 +139417,7 @@ class value_iterator { simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; - simdjson_inline error_code error() const noexcept; + simdjson_warn_unused simdjson_inline error_code error() const noexcept; simdjson_inline uint8_t *&string_buf_loc() noexcept; simdjson_inline const json_iterator &json_iter() const noexcept; simdjson_inline json_iterator &json_iter() noexcept; @@ -113124,8 +139501,8 @@ class value_iterator { simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_inline error_code end_container() noexcept; + simdjson_warn_unused simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -113135,8 +139512,8 @@ class value_iterator { */ simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_warn_unused simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_warn_unused simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; simdjson_inline bool is_at_start() const noexcept; /** @@ -113173,7 +139550,7 @@ class value_iterator { /** @copydoc error_code json_iterator::end_position() const noexcept; */ simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -113210,6 +139587,7 @@ struct simdjson_result : public lasx::implementa /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include @@ -113238,13 +139616,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -113261,22 +139640,38 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { - #if SIMDJSON_SUPPORTS_DESERIALIZATION + #if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); } else { static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " "And you do not seem to have added support for it. Indeed, we have that " @@ -113286,7 +139681,7 @@ class value { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -113404,7 +139799,7 @@ class value { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a "wobbly" string. @@ -113581,12 +139976,14 @@ class value { */ simdjson_inline simdjson_result at(size_t index) noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -113615,7 +140012,8 @@ class value { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -113858,6 +140256,14 @@ class value { */ simdjson_inline simdjson_result at_path(std::string_view at_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard character (*) for arrays or ".*" for objects. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; protected: /** @@ -113925,7 +140331,7 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -113954,12 +140360,14 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -113986,7 +140394,8 @@ struct simdjson_result : public lasx::implementation_simd * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the defaul because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -114006,7 +140415,22 @@ struct simdjson_result : public lasx::implementation_simd simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -114030,6 +140454,7 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result current_depth() const noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; }; } // namespace simdjson @@ -114500,14 +140925,14 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_warn_unused simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; /** * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with @@ -114527,7 +140952,7 @@ class json_iterator { simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; - simdjson_inline error_code consume_character(char c) noexcept; + simdjson_warn_unused simdjson_inline error_code consume_character(char c) noexcept; #if SIMDJSON_DEVELOPMENT_CHECKS simdjson_inline token_position start_position(depth_t depth) const noexcept; simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; @@ -114618,6 +141043,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -114824,6 +141250,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -114963,10 +141395,10 @@ struct simdjson_result : public lasx::implement simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -114982,6 +141414,7 @@ struct simdjson_result : public lasx::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace lasx { @@ -115100,7 +141533,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -115188,6 +141623,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -115195,10 +141635,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -115224,14 +141664,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -115333,13 +141775,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -115367,6 +141835,314 @@ struct simdjson_result : public lasx::implementation_sim #endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H /* end file simdjson/generic/ondemand/parser.h for lasx */ +// JSON builder - needed for extract_into functionality +/* including simdjson/generic/ondemand/json_string_builder.h for lasx: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + + +#if SIMDJSON_SUPPORTS_CONCEPTS + +namespace lasx { +namespace builder { + class string_builder; +}} + +template +struct has_custom_serialization : std::false_type {}; + +inline constexpr struct serialize_tag { + template + constexpr void operator()(lasx::builder::string_builder& b, T&& obj) const{ + return tag_invoke(*this, b, std::forward(obj)); + } + + +} serialize{}; +template +struct has_custom_serialization(), std::declval())) +>> : std::true_type {}; + +template +constexpr bool require_custom_serialization = has_custom_serialization::value; +#else +struct has_custom_serialization : std::false_type {}; +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +namespace lasx { +namespace builder { +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = DEFAULT_INITIAL_CAPACITY); + + static constexpr size_t DEFAULT_INITIAL_CAPACITY = 1024; + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void escape_and_append_with_quotes() noexcept; +#endif + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + template + simdjson_inline void append_key_value(value_type value) noexcept; + + // Support for optional types (std::optional, etc.) + template + requires(!require_custom_serialization) + simdjson_inline void append(const T &opt); + + template + requires(require_custom_serialization) + simdjson_inline void append(T &&val); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value && !require_custom_serialization) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = simdjson::lasx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::lasx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = simdjson::lasx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + simdjson::lasx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view sv; + auto e = b.view().get(sv); + if(e) { return e; } + s.assign(sv.data(), sv.size()); + return simdjson::SUCCESS; +} +#endif + +#if SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_SUPPORTS_CONCEPTS + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for lasx */ + // All other declarations /* including simdjson/generic/ondemand/array.h for lasx: #include "simdjson/generic/ondemand/array.h" */ /* begin file simdjson/generic/ondemand/array.h for lasx */ @@ -115377,6 +142153,7 @@ struct simdjson_result : public lasx::implementation_sim /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -115479,7 +142256,7 @@ class array { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * @return The value associated with the given JSONPath expression, or: * - INVALID_JSON_POINTER if the JSONPath to JSON Pointer conversion fails @@ -115489,6 +142266,15 @@ class array { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like "[*]" to match all array elements. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. @@ -115503,11 +142289,42 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -115580,8 +142397,30 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result at(size_t index) noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -115626,7 +142465,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -115650,6 +142490,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -115668,7 +142513,6 @@ namespace simdjson { template<> struct simdjson_result : public lasx::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(lasx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -115681,6 +142525,8 @@ struct simdjson_result : public lasx::implementa simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -115697,6 +142543,7 @@ struct simdjson_result : public lasx::implementa /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/deserialize.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ @@ -115808,7 +142655,7 @@ class document { * @returns INCORRECT_TYPE if the JSON value is not a string. Otherwise, we return SUCCESS. */ template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; /** * Cast this JSON value to a string. * @@ -115874,7 +142721,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115897,7 +142744,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115915,18 +142762,18 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -115938,7 +142785,7 @@ class document { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -115948,7 +142795,7 @@ class document { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ @@ -116013,7 +142860,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -116022,7 +142869,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -116093,12 +142940,14 @@ class document { simdjson_inline simdjson_result end() & noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -116137,7 +142986,8 @@ class document { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -116172,11 +143022,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -116376,7 +143242,7 @@ class document { * JSONPath queries that trivially convertible to JSON Pointer queries: key * names and array indices. * - * https://datatracker.ietf.org/doc/html/draft-normington-jsonpath-00 + * https://www.rfc-editor.org/rfc/rfc9535 (RFC 9535) * * Key values are matched exactly, without unescaping or Unicode normalization. * We do a byte-by-byte comparison. E.g. @@ -116394,17 +143260,64 @@ class document { */ simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * + * Supports wildcard patterns like "$.array[*]" or "$.object.*" to match multiple elements. + * + * This method materializes all matching values into a vector. + * The document will be consumed after this call. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern, or: + * - INVALID_JSON_POINTER if the JSONPath cannot be parsed + * - NO_SUCH_FIELD if a field does not exist + * - INDEX_OUT_OF_BOUNDS if an array index is out of bounds + * - INCORRECT_TYPE if path traversal encounters wrong type + */ + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ simdjson_inline simdjson_result raw_json() noexcept; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * doc.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION protected: /** * Consumes the document. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; simdjson_inline document(ondemand::json_iterator &&iter) noexcept; simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; @@ -116457,7 +143370,7 @@ class document_reference { simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -116466,7 +143379,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -116479,7 +143392,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -116501,14 +143414,14 @@ class document_reference { * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template - simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION + simdjson_warn_unused simdjson_inline error_code get(T &out) & +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { @@ -116520,7 +143433,7 @@ class document_reference { static_cast(out); // to get rid of unused errors return UNINITIALIZED; } -#else // SIMDJSON_SUPPORTS_DESERIALIZATION +#else // SIMDJSON_SUPPORTS_CONCEPTS // Unless the simdjson library or the user provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " @@ -116530,12 +143443,17 @@ class document_reference { " You may also add support for custom types, see our documentation."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS } /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS template @@ -116576,6 +143494,7 @@ class document_reference { simdjson_inline simdjson_result raw_json_token() noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; private: document *doc{nullptr}; @@ -116604,7 +143523,7 @@ struct simdjson_result : public lasx::implementation_s simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -116617,6 +143536,9 @@ struct simdjson_result : public lasx::implementation_s template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using lasx::implementation_simdjson_result_base::operator*; + using lasx::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator lasx::ondemand::array() & noexcept(false); @@ -116656,6 +143578,12 @@ struct simdjson_result : public lasx::implementation_s simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -116682,7 +143610,7 @@ struct simdjson_result : public lasx::implem simdjson_inline simdjson_result get_double_in_string() noexcept; simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; template - simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; + simdjson_warn_unused simdjson_inline error_code get_string(string_type& receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result get_wobbly_string() noexcept; simdjson_inline simdjson_result get_raw_json_string() noexcept; simdjson_inline simdjson_result get_bool() noexcept; @@ -116733,6 +143661,12 @@ struct simdjson_result : public lasx::implem simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; +#if SIMDJSON_STATIC_REFLECTION + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION }; @@ -116825,7 +143759,7 @@ class document_stream { /** * Construct an uninitialized document_stream. * - * ```c++ + * ```cpp * document_stream docs; * auto error = parser.iterate_many(json).get(docs); * ``` @@ -116875,6 +143809,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -116888,6 +143823,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -116931,6 +143867,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -116942,6 +143883,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -117205,6 +144147,10 @@ struct simdjson_result : public lasx::implementation_simd /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION && SIMDJSON_SUPPORTS_CONCEPTS */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -117226,12 +144172,14 @@ class object { simdjson_inline simdjson_result begin() noexcept; simdjson_inline simdjson_result end() noexcept; /** - * Look up a field by name on an object (order-sensitive). + * Look up a field by name on an object (order-sensitive). By order-sensitive, we mean that + * fields must be accessed in the order they appear in the JSON text (although you can + * skip fields). See find_field_unordered() and operator[] for an order-insensitive version. * * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the * JSON `{ "x": 1, "y": 2, "z": 3 }`: * - * ```c++ + * ```cpp * simdjson::ondemand::parser parser; * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); * double z = obj.find_field("z"); @@ -117274,7 +144222,8 @@ class object { * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object * in question is large. The fact that the extra code is there also bumps the executable size. * - * It is the default, however, because it would be highly surprising (and hard to debug) if the + * We default operator[] on find_field_unordered() for convenience. + * It is the default because it would be highly surprising (and hard to debug) if the * default behavior failed to look up a field just because it was in the wrong order--and many * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. * @@ -117357,6 +144306,15 @@ class object { */ inline simdjson_result at_path(std::string_view json_path) noexcept; + /** + * Get all values matching the given JSONPath expression with wildcard support. + * Supports wildcard patterns like ".*" to match all object fields. + * + * @param json_path JSONPath expression with wildcards + * @return Vector of values matching the wildcard pattern + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; + /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you @@ -117402,11 +144360,71 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } + +#if SIMDJSON_STATIC_REFLECTION + /** + * Extract only specific fields from the JSON object into a struct. + * + * This allows selective deserialization of only the fields you need, + * potentially improving performance by skipping unwanted fields. + * + * Example: + * ```cpp + * struct Car { + * std::string make; + * std::string model; + * int year; + * double price; + * }; + * + * Car car; + * object.extract_into<"make", "model">(car); + * // Only 'make' and 'model' fields are extracted from JSON + * ``` + * + * @tparam FieldNames Compile-time string literals specifying which fields to extract + * @param out The output struct to populate with selected fields + * @returns SUCCESS on success, or an error code if a required field is missing or has wrong type + */ + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) & noexcept; +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_inline error_code consume() noexcept; + simdjson_warn_unused simdjson_inline error_code consume() noexcept; static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; @@ -117445,12 +144463,43 @@ struct simdjson_result : public lasx::implementation_sim simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - + simdjson_inline simdjson_result> at_path_with_wildcard(std::string_view json_path) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_warn_unused simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#if SIMDJSON_STATIC_REFLECTION + // TODO: move this code into object-inl.h + template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) + simdjson_warn_unused simdjson_inline error_code extract_into(T& out) noexcept { + if (error()) { return error(); } + return first.extract_into(out); + } +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -117579,6 +144628,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -117650,28 +144713,30 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { -template -constexpr bool require_custom_serialization = false; ////////////////////////////// // Number deserialization ////////////////////////////// template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -117685,7 +144750,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { double x; SIMDJSON_TRY(val.get_double().get(x)); @@ -117694,7 +144758,6 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { } template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { using limits = std::numeric_limits; @@ -117707,8 +144770,23 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { std::string_view str; SIMDJSON_TRY(val.get_string().get(str)); @@ -117725,7 +144803,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothr * doc.get>(). */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::value_type; static_assert( @@ -117734,9 +144811,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - lasx::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, lasx::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -117767,7 +144848,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * string-keyed types. */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { using value_type = typename std::remove_cvref_t::mapped_type; static_assert( @@ -117793,7 +144873,45 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { return SUCCESS; } +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + lasx::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::value &val, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::document &doc, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::document_reference &doc, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} /** @@ -117811,7 +144929,6 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { * @return status of the conversion */ template - requires(!require_custom_serialization) error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::element_type, ValT>) { using element_type = typename std::remove_cvref_t::element_type; @@ -117836,17 +144953,17 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template - requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +template +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -117855,10 +144972,329 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + lasx::ondemand::object obj; + if constexpr (std::is_same_v, lasx::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + if constexpr (concepts::optional_type) { + // for optional members, it's ok if the key is missing + auto error = obj[key].get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + if(error == NO_SUCH_FIELD) { + out.[:mem:].reset(); + continue; + } + return error; + } + } else { + // for non-optional members, the key must be present + SIMDJSON_TRY(obj[key].get(out.[:mem:])); + } + } + }; + return simdjson::SUCCESS; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + bool is_null_value; + SIMDJSON_TRY( val.is_null().get(is_null_value) ); + if (is_null_value) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for lasx */ // Inline definitions @@ -117951,7 +145387,7 @@ simdjson_inline simdjson_result array::begin() noexcept { simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_inline error_code array::consume() noexcept { +simdjson_warn_unused simdjson_warn_unused simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; @@ -118036,6 +145472,67 @@ inline simdjson_result array::at_path(std::string_view json_path) noexcep return at_pointer(json_pointer); } +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Wildcard case + if(key=="*"){ + for(auto element: *this){ + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + // Use value_unsafe() because we've already checked for errors above. + // The 'element' is a simdjson_result wrapper, and we need to extract + // the underlying value. value_unsafe() is safe here because error() returned false. + result.push_back(std::move(element).value_unsafe()); + + }else{ + auto nested_result = element.at_path_with_wildcard(remaining_path); + + if(nested_result.error()){ + return nested_result.error(); + } + // Same logic as above. + std::vector nested_matches = std::move(nested_result).value_unsafe(); + + result.insert(result.end(), + std::make_move_iterator(nested_matches.begin()), + std::make_move_iterator(nested_matches.end())); + } + } + return result; + }else{ + // Specific index case in which we access the element at the given index + size_t idx=0; + + for(char c:key){ + if(c < '0' || c > '9'){ + return INVALID_JSON_POINTER; + } + idx = idx*10 + (c - '0'); + } + + auto element = at(idx); + + if(element.error()){ + return element.error(); + } + + if(remaining_path.empty()){ + result.push_back(std::move(element).value_unsafe()); + return result; + }else{ + return element.at_path_with_wildcard(remaining_path); + } + } +} + simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { @@ -118094,6 +145591,10 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { if (error()) { return error(); } return first.raw_json(); @@ -118142,6 +145643,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace lasx } // namespace simdjson @@ -118178,7 +145682,9 @@ simdjson_inline simdjson_result &simdjson_result ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -118235,7 +145741,7 @@ simdjson_inline simdjson_result value::get_string(bool allow_r return iter.get_string(allow_replacement); } template -simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code value::get_string(string_type& receiver, bool allow_replacement) noexcept { return iter.get_string(receiver, allow_replacement); } simdjson_inline simdjson_result value::get_wobbly_string() noexcept { @@ -118277,15 +145783,15 @@ template<> simdjson_inline simdjson_result value::get() noexcept { retu template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } -template<> simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } -template<> simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } -template<> simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } -template<> simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } -template<> simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(array& out) noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(object& out) noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(raw_json_string& out) noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(std::string_view& out) noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(number& out) noexcept { return get_number().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(double& out) noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(uint64_t& out) noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(int64_t& out) noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code value::get(bool& out) noexcept { return get_bool().get(out); } #if SIMDJSON_EXCEPTIONS template @@ -118483,6 +145989,19 @@ simdjson_inline simdjson_result value::at_path(std::string_view json_path } } +inline simdjson_result> value::at_path_with_wildcard(std::string_view json_path) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + } // namespace ondemand } // namespace lasx } // namespace simdjson @@ -118733,6 +146252,14 @@ simdjson_inline simdjson_result simdjson_result> simdjson_result::at_path_with_wildcard( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H @@ -118882,7 +146409,7 @@ simdjson_inline simdjson_result document::get_string(bool allo return get_root_value_iterator().get_root_string(true, allow_replacement); } template -simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code document::get_string(string_type& receiver, bool allow_replacement) noexcept { return get_root_value_iterator().get_root_string(receiver, true, allow_replacement); } simdjson_inline simdjson_result document::get_wobbly_string() noexcept { @@ -118908,15 +146435,15 @@ template<> simdjson_inline simdjson_result document::get() & noexcept { template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } -template<> simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } -template<> simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } -template<> simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } -template<> simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } -template<> simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } -template<> simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } -template<> simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } -template<> simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } -template<> simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(array& out) & noexcept { return get_array().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(object& out) & noexcept { return get_object().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(raw_json_string& out) & noexcept { return get_raw_json_string().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(std::string_view& out) & noexcept { return get_string(false).get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(double& out) & noexcept { return get_double().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(uint64_t& out) & noexcept { return get_uint64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(int64_t& out) & noexcept { return get_int64().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(bool& out) & noexcept { return get_bool().get(out); } +template<> simdjson_warn_unused simdjson_inline error_code document::get(value& out) & noexcept { return get_value().get(out); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } template<> simdjson_deprecated simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } @@ -118936,8 +146463,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -118960,1160 +146487,1980 @@ simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } -simdjson_inline simdjson_result document::begin() & noexcept { - return get_array().begin(); +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_warn_unused simdjson_inline error_code document::consume() noexcept { + bool scalar = false; + auto error = is_scalar().get(scalar); + if(error) { return error; } + if(scalar) { + iter.return_current_and_advance(); + return SUCCESS; + } + error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + // For more speed, we could do: + // return iter.is_single_token(); + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline simdjson_result document::is_string() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return (this_type == json_type::string); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_path.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path(json_path); + case json_type::object: + return (*this).get_object().at_path(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +simdjson_inline simdjson_result> document::at_path_with_wildcard(std::string_view json_path) noexcept { + rewind(); // Rewind the document each time at_path_with_wildcard is called + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) { + case json_type::array: + return (*this).get_array().at_path_with_wildcard(json_path); + case json_type::object: + return (*this).get_object().at_path_with_wildcard(json_path); + default: + return INVALID_JSON_POINTER; + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; + + return SUCCESS; +} + +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(receiver, allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) & noexcept = delete; +template<> simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +template ::value == false>::type> +simdjson_inline simdjson_result::operator T() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::end() & noexcept { - return {}; +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { - return start_or_resume_object().find_field(key); +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return start_or_resume_object().find_field_unordered(key); +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } -simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } - -simdjson_inline error_code document::consume() noexcept { - bool scalar = false; - auto error = is_scalar().get(scalar); - if(error) { return error; } - if(scalar) { - iter.return_current_and_advance(); - return SUCCESS; - } - error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; } +#endif -simdjson_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); } -simdjson_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); } -simdjson_inline simdjson_result document::is_scalar() noexcept { - // For more speed, we could do: - // return iter.is_single_token(); - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); } -simdjson_inline simdjson_result document::is_string() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return (this_type == json_type::string); +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); } -simdjson_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(true); +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path(json_path); } -simdjson_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(true); +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(true); +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); } +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson -simdjson_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_root_length()); -} -simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} +namespace simdjson { +namespace lasx { +namespace ondemand { -simdjson_inline simdjson_result document::at_path(std::string_view json_path) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_path.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) { - case json_type::array: - return (*this).get_array().at_path(json_path); - case json_type::object: - return (*this).get_object().at_path(json_path); - default: - return INVALID_JSON_POINTER; - } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +template +simdjson_warn_unused simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline document_reference::operator T() noexcept(false) { return get(); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } +simdjson_inline simdjson_result> document_reference::at_path_with_wildcard(std::string_view json_path) noexcept { return doc->at_path_with_wildcard(json_path); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code document_reference::extract_into(T& out) & noexcept { + return doc->extract_into(out); } - +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION } // namespace ondemand } // namespace lasx } // namespace simdjson + + namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_inline simdjson_result::simdjson_result( - lasx::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(allow_replacement); } template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { if (error()) { return error(); } return first.get_string(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { if (error()) { return error(); } return first.get_wobbly_string(); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_string() noexcept { + if (error()) { return error(); } + return first.is_string(); +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) & noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +template <> +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) && noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +template +simdjson_inline simdjson_result::operator T() noexcept(false) { + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + static_assert(std::is_same::value == false, "You should not call get when T is a document"); + if (error()) { throw simdjson_error(error()); } + return first.get(); +} +simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); +} +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} +#if SIMDJSON_STATIC_REFLECTION +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code simdjson_result::extract_into(T& out) & noexcept { + if (error()) { return error(); } + return first.extract_into(out); +} +#endif // SIMDJSON_STATIC_REFLECTION +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for lasx */ +/* including simdjson/generic/ondemand/document_stream-inl.h for lasx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace lasx { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif } -template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_deprecated simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ } -template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) & noexcept = delete; -template<> simdjson_inline error_code simdjson_result::get(lasx::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); +inline size_t document_stream::size_in_bytes() const noexcept { + return len; } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { } -simdjson_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + return simdjson_result(stream->doc, stream->error); } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; } -#if SIMDJSON_EXCEPTIONS -template ::value == false>::type> -simdjson_inline simdjson_result::operator T() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; } -simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); } -simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED } -#endif +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } } -simdjson_inline bool simdjson_result::at_end() const noexcept { - if (error()) { return error(); } - return first.at_end(); +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { + error_code ignored = doc.iter.consume_character(','); + static_cast(ignored); // ignored on purpose + } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); } +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} -simdjson_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: We could remove trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + { + auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; + // normally the length would be next_index - current_index() - 1, except for the last document + size_t svlen = next_index - current_index(); + const char *start = reinterpret_cast(stream->buf) + current_index(); + while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { + svlen--; + } + return std::string_view(start, svlen); + } + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { return error(); } - return first.at_path(json_path); +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; } -} // namespace simdjson +#ifdef SIMDJSON_THREADS_ENABLED +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } -namespace simdjson { -namespace lasx { -namespace ondemand { + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} -simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -/** - * The document_reference instances are used primarily/solely for streams of JSON - * documents. - * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should - * give an error, so we check for trailing content. - * - * However, for streams of JSON documents, we want to be able to start from - * "321" "321" "321" - * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() - * successfully each time. - * - * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: - * this indicates that we allow trailing content. - */ -simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } -simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } -simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } -simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } -simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } -template -simdjson_inline error_code document_reference::get_string(string_type& receiver, bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(receiver, false, allow_replacement); } -simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } -simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } -simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } -simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_array(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_object(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_string(false); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_double(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_uint64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_int64(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_bool(); } -template<> simdjson_inline simdjson_result document_reference::get() & noexcept { return get_value(); } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline document_reference::operator T() noexcept(false) { return get(); } -simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } -simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } -simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_inline simdjson_result document_reference::is_string() noexcept { return doc->is_string(); } -simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } -simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } -simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } -simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_inline simdjson_result document_reference::at_path(std::string_view json_path) noexcept { return doc->at_path(json_path); } -simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_inline document_reference::operator document&() const noexcept { return *doc; } +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED } // namespace ondemand } // namespace lasx } // namespace simdjson - - namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} - -simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::end() & noexcept { - return {}; +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for lasx */ +/* including simdjson/generic/ondemand/field-inl.h for lasx: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// clang 6 does not think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); } -simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; } -simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); + +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; } -simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; } -simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); + + +simdjson_inline std::string_view field::key_raw_json_token() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); } -simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); + +simdjson_inline std::string_view field::escaped_key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + auto end_quote = second.iter._json_iter->token.peek(-1); + while(*end_quote != '"') end_quote--; + return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); } -simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); + +simdjson_inline value &field::value() & noexcept { + return second; } -simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; } -simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ } -simdjson_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ } -simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } - return first.get_double_in_string(); + return first.key(); } -simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { if (error()) { return error(); } - return first.get_string(allow_replacement); + return first.key_raw_json_token(); } -template -simdjson_inline error_code simdjson_result::get_string(string_type& receiver, bool allow_replacement) noexcept { + +simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { if (error()) { return error(); } - return first.get_string(receiver, allow_replacement); + return first.escaped_key(); } -simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { if (error()) { return error(); } - return first.get_wobbly_string(); + return first.unescaped_key(allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { if (error()) { return error(); } - return first.get_raw_json_string(); + return first.unescaped_key(receiver, allow_replacement); } -simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } - return first.get_bool(); + return std::move(first.value()); } -simdjson_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; } -simdjson_inline simdjson_result simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; } -template -simdjson_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -template -simdjson_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); + +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{streaming} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif } -template -simdjson_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; } -template -simdjson_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; } -simdjson_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); } -simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); } -simdjson_inline simdjson_result simdjson_result::is_string() noexcept { - if (error()) { return error(); } - return first.is_string(); + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; } -template <> -simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) & noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; } -template <> -simdjson_inline error_code simdjson_result::get(lasx::ondemand::document_reference &out) && noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; } -simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif } -simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); } -simdjson_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning +#endif } -#if SIMDJSON_EXCEPTIONS -template -simdjson_inline simdjson_result::operator T() noexcept(false) { - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - static_assert(std::is_same::value == false, "You should not call get when T is a document"); - if (error()) { throw simdjson_error(error()); } - return first.get(); + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); } -simdjson_inline simdjson_result::operator lasx::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; } -simdjson_inline simdjson_result::operator lasx::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); } -simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); } -simdjson_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; } -simdjson_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; } -simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); } -simdjson_inline simdjson_result::operator lasx::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); } -simdjson_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); } -simdjson_inline simdjson_result::operator lasx::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); } -#endif -simdjson_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); } -simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} +simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_root_length(position); } -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); } -simdjson_inline simdjson_result simdjson_result::at_path(std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; } -} // namespace simdjson +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H -/* end file simdjson/generic/ondemand/document-inl.h for lasx */ -/* including simdjson/generic/ondemand/document_stream-inl.h for lasx: #include "simdjson/generic/ondemand/document_stream-inl.h" */ -/* begin file simdjson/generic/ondemand/document_stream-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} -#include -#include +simdjson_warn_unused simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} -namespace simdjson { -namespace lasx { -namespace ondemand { +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} -#ifdef SIMDJSON_THREADS_ENABLED +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif +} -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else + return parser->unescape_wobbly(in, _string_buf_loc); +#endif } -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; } -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. +simdjson_warn_unused simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); + return TAPE_ERROR; } +#if SIMDJSON_DEVELOPMENT_CHECKS -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } -#endif // SIMDJSON_THREADS_ENABLED - -simdjson_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size, - bool _allow_comma_separated -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - allow_comma_separated{_allow_comma_separated}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } #endif -} -simdjson_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - allow_comma_separated{false}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ -} -simdjson_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif +simdjson_warn_unused simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; } -inline size_t document_stream::size_in_bytes() const noexcept { - return len; -} -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; } -simdjson_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} +namespace simdjson { -simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { - return simdjson_result(stream->doc, stream->error); -} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; -} +} // namespace simdjson -simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; -} +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_type-inl.h for lasx: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -simdjson_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; } -simdjson_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); } +#endif -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } - } - #endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; } -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; - } - } +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); } -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // consume comma if comma separated is allowed - if (allow_comma_separated) { doc.iter.consume_character(','); } - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; } -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; } -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); } -simdjson_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; } -simdjson_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: We could remove trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - { - auto next_index = stream->parser->implementation->structural_indexes[++cur_struct_index]; - // normally the length would be next_index - current_index() - 1, except for the last document - size_t svlen = next_index - current_index(); - const char *start = reinterpret_cast(stream->buf) + current_index(); - while(svlen > 1 && (std::isspace(start[svlen-1]) || start[svlen-1] == '\0')) { - svlen--; - } - return std::string_view(start, svlen); - } - } - cur_struct_index++; - } +simdjson_inline number::operator double() const noexcept { + return get_double(); +} - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; } - - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); } -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; } -#ifdef SIMDJSON_THREADS_ENABLED - -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } - - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); - } +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; } -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); - - worker->run(this, & this->stage1_thread_parser, _next_batch_start); +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; } -#endif // SIMDJSON_THREADS_ENABLED +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} } // namespace ondemand } // namespace lasx @@ -120121,2105 +148468,2643 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_inline simdjson_result::simdjson_result( - lasx::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -} +} // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H -/* end file simdjson/generic/ondemand/document_stream-inl.h for lasx */ -/* including simdjson/generic/ondemand/field-inl.h for lasx: #include "simdjson/generic/ondemand/field-inl.h" */ -/* begin file simdjson/generic/ondemand/field-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for lasx */ +/* including simdjson/generic/ondemand/logger-inl.h for lasx: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#include +#include + namespace simdjson { namespace lasx { namespace ondemand { +namespace logger { -// clang 6 does not think the default constructor can be noexcept, so we make it explicit -simdjson_inline field::field() noexcept : std::pair() {} +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. -simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } } -simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); } -simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; } -simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); - first.consume(); - return answer; +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; } -template -simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { - std::string_view key; - SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); - receiver = key; - return SUCCESS; +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); } -simdjson_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } - -simdjson_inline std::string_view field::key_raw_json_token() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return std::string_view(reinterpret_cast(first.buf-1), second.iter._json_iter->token.peek(-1) - first.buf + 1); +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); } - -simdjson_inline std::string_view field::escaped_key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - auto end_quote = second.iter._json_iter->token.peek(-1); - while(*end_quote != '"') end_quote--; - return std::string_view(reinterpret_cast(first.buf), end_quote - first.buf); +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); } -simdjson_inline value &field::value() & noexcept { - return second; +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } } - -simdjson_inline value field::value() && noexcept { - return std::forward(*this).second; +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } } -} // namespace ondemand -} // namespace lasx -} // namespace simdjson +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} -namespace simdjson { +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} -simdjson_inline simdjson_result::simdjson_result( - lasx::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::key_raw_json_token() noexcept { - if (error()) { return error(); } - return first.key_raw_json_token(); +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::escaped_key() noexcept { - if (error()) { return error(); } - return first.escaped_key(); +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); } -simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(allow_replacement); +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } } -template -simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { - if (error()) { return error(); } - return first.unescaped_key(receiver, allow_replacement); +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); } -simdjson_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } } +} // namespace logger +} // namespace ondemand +} // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H -/* end file simdjson/generic/ondemand/field-inl.h for lasx */ -/* including simdjson/generic/ondemand/json_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for lasx */ +/* including simdjson/generic/ondemand/object-inl.h for lasx: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/jsonpathutil.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_string_builder.h" // for constevalutil::fixed_string */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lasx { namespace ondemand { -simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; -} - -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } - -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser, bool streaming) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{streaming} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); } - -inline bool json_iterator::balanced() const noexcept { - token_iterator ti(token); - int32_t count{0}; - ti.set_position( root_position() ); - while(ti.peek() <= peek_last()) { - switch (*ti.return_current_and_advance()) - { - case '[': case '{': - count++; - break; - case ']': case '}': - count--; - break; - default: - break; - } - } - return count == 0; +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); } - - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; } - - return report_error(TAPE_ERROR, "not enough close braces"); + return value(iter.child()); } -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::is_single_token() const noexcept { - return parser->implementation->n_structural_indexes == 1; +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); } - -simdjson_inline bool json_iterator::streaming() const noexcept { - return _streaming; +simdjson_warn_unused simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; } -simdjson_inline token_position json_iterator::root_position() const noexcept { - return _root; +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); } -simdjson_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; } -simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ } -simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif + return object_iterator(iter); } - -simdjson_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); } -simdjson_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; } -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); +inline simdjson_result object::at_path(std::string_view json_path) noexcept { + auto json_pointer = json_path_to_pointer_conversion(json_path); + if (json_pointer == "-1") { + return INVALID_JSON_POINTER; + } + return at_pointer(json_pointer); } -inline simdjson_result json_iterator::current_location() const noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) noexcept { + std::vector result; + + auto result_pair = get_next_key_and_json_path(json_path); + std::string_view key = result_pair.first; + std::string_view remaining_path = result_pair.second; + // Handle when its the case for wildcard + if (key == "*") { + // Loop through each field in the object + for (auto field : *this) { + value val; + SIMDJSON_TRY(field.value().get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + } else { + auto nested_result = val.at_path_with_wildcard(remaining_path); + + if (nested_result.error()) { + return nested_result.error(); + } + // Extract and append all nested matches to our result + std::vector nested_vec; + SIMDJSON_TRY(std::move(nested_result).get(nested_vec)); + + result.insert(result.end(), + std::make_move_iterator(nested_vec.begin()), + std::make_move_iterator(nested_vec.end())); + } + } + return result; + } else { + value val; + SIMDJSON_TRY(find_field(key).get(val)); + + if (remaining_path.empty()) { + result.push_back(std::move(val)); + return result; } else { - return reinterpret_cast(token.peek()); + return val.at_path_with_wildcard(remaining_path); } } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); } -simdjson_inline bool json_iterator::is_alive() const noexcept { - return parser; +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; } -simdjson_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; } -simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); } -simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(); -} +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION -simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_inline error_code object::extract_into(T& out) & noexcept { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; -simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only extract this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + // Try to find and extract the field + if constexpr (concepts::optional_type) { + // For optional fields, it's ok if they're missing + auto field_result = find_field_unordered(key); + if (!field_result.error()) { + auto error = field_result.get(out.[:mem:]); + if (error && error != NO_SUCH_FIELD) { + return error; + } + } else if (field_result.error() != NO_SUCH_FIELD) { + return field_result.error(); + } else { + out.[:mem:].reset(); + } + } else { + // For required fields (in the requested list), fail if missing + SIMDJSON_TRY((*this)[key].get(out.[:mem:])); + } + } + } + }; -simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); + return SUCCESS; } -simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +} // namespace ondemand +} // namespace lasx +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); } -simdjson_inline uint32_t json_iterator::peek_root_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_root_length(position); +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); } - -simdjson_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); } -simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); } -simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); } -simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; +simdjson_inline simdjson_result simdjson_result::at_path( + std::string_view json_path) noexcept { + if (error()) { + return error(); + } + return first.at_path(json_path); } -simdjson_inline depth_t json_iterator::depth() const noexcept { - return _depth; +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); } -simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); } -simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); } -simdjson_inline token_position json_iterator::position() const noexcept { - return token.position(); +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); } -simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape(in, _string_buf_loc, allow_replacement); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); - return result; -#else - return parser->unescape(in, _string_buf_loc, allow_replacement); -#endif +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); } -simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - auto result = parser->unescape_wobbly(in, _string_buf_loc); - SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for lasx */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } return result; -#else - return parser->unescape_wobbly(in, _string_buf_loc); -#endif } - -simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#if SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); } - -simdjson_inline error_code json_iterator::consume_character(char c) noexcept { - if (*peek() == c) { - return_current_and_advance(); - return SUCCESS; - } - return TAPE_ERROR; +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); } -#if SIMDJSON_DEVELOPMENT_CHECKS +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error -simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; -} + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } -simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); - if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; } +SIMDJSON_POP_DISABLE_WARNINGS -#endif +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + lasx::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; } - - -simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { - // This function is not expected to be called in performance-sensitive settings. - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - if(N > max_len) { // We pad whatever remains with ' '. - std::memset(tmpbuf + max_len, ' ', N - max_len); - } - return true; +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; } -} // namespace ondemand -} // namespace lasx -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/json_iterator-inl.h for lasx */ -/* including simdjson/generic/ondemand/json_type-inl.h for lasx: #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* begin file simdjson/generic/ondemand/json_type-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/parser-inl.h for lasx: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lasx { namespace ondemand { -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); } #endif +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + json.remove_utf8_bom(); -simdjson_inline number_type number::get_number_type() const noexcept { - return type; + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; +#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); + if (err) { + if (err != UNCLOSED_STRING) + return err; + } + return document::start({ reinterpret_cast(json.data()), this, true }); } +#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline number::operator uint64_t() const noexcept { - return get_uint64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); } -simdjson_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); } -simdjson_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { + return iterate(pad_with_reserve(json)); } -simdjson_inline number::operator int64_t() const noexcept { - return get_int64(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); } -simdjson_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); } -simdjson_inline double number::get_double() const noexcept { - return payload.floating_point_number; +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); } -simdjson_inline number::operator double() const noexcept { - return get_double(); +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + + json.remove_utf8_bom(); + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); } -simdjson_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { + buf += 3; + len -= 3; } - if(is_int64()) { - return double(payload.signed_integer); + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} + +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); +} +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } - return double(payload.unsigned_integer); } -simdjson_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; } -simdjson_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); } -simdjson_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; } +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace lasx } // namespace simdjson namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H -/* end file simdjson/generic/ondemand/json_type-inl.h for lasx */ -/* including simdjson/generic/ondemand/logger-inl.h for lasx: #include "simdjson/generic/ondemand/logger-inl.h" */ -/* begin file simdjson/generic/ondemand/logger-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for lasx */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for lasx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ -#include -#include - namespace simdjson { + namespace lasx { namespace ondemand { -namespace logger { -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); } -template -static inline std::string string_format(const std::string& format, const Args&... args) -{ - SIMDJSON_PUSH_DISABLE_ALL_WARNINGS - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - auto size = static_cast(size_s); - if (size <= 0) return std::string(); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - SIMDJSON_POP_DISABLE_WARNINGS - return std::string(buf.get(), buf.get() + size - 1); +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; } -static inline log_level get_log_level_from_env() -{ - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *lvl = getenv("SIMDJSON_LOG_LEVEL"); - SIMDJSON_POP_DISABLE_WARNINGS - if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } - return log_level::info; +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + while(pos < target.size()) { + pos = target.find('"', pos); + if(pos == std::string_view::npos) { return true; } + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -static inline log_level log_threshold() -{ - static log_level threshold = get_log_level_from_env(); - return threshold; +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + while(target[pos]) { + const char * result = strchr(target+pos, '"'); + if(result == nullptr) { return true; } + pos = result - target; + if(pos != 0 && target[pos-1] != '\\') { return false; } + if(pos > 1 && target[pos-2] == '\\') { + size_t backslash_count{2}; + for(size_t i = 3; i <= pos; i++) { + if(target[pos-i] == '\\') { backslash_count++; } + else { break; } + } + if(backslash_count % 2 == 0) { return false; } + } + pos++; + } + return true; } -static inline bool should_log(log_level level) -{ - return level >= log_threshold(); -} -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail, log_level::info); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters("), and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail, log_level::info); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta, log_level::info); - if (LOG_ENABLED) { log_depth++; } +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta, log_level::info); -} -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; } -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); } -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; } -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); } -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); } -inline void log_headers() noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(log_level::info))) { - // Technically a static variable is not thread-safe, but if you are using threads and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if (!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf( - "# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5 + 2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); - } - } +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); } -template -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); } -template -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { - if (LOG_ENABLED) { - if (simdjson_unlikely(should_log(level))) { - const int indent = depth * 2; - const auto buf = iter.token.buf; - auto msg = string_format(title, std::forward(args)...); - printf("| %*s%s%-*s ", indent, "", title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); - { - // Print the current structural. - printf("| "); - // Before we begin, the index might point right before the document. - // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 - if (index < iter._root) { - printf("%*s", LOG_BUFFER_LEN, ""); - } else { - auto current_structural = &buf[*index]; - for (int i = 0; i < LOG_BUFFER_LEN; i++) { - printf("%c", printable_char(current_structural[i])); - } - } - printf(" "); - } - { - // Print the next structural. - printf("| "); - auto next_structural = &buf[*(index + 1)]; - for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { - printf("%c", printable_char(next_structural[i])); - } - printf(" "); - } - // printf("| %5u ", *(index+1)); - printf("| %5i ", depth); - printf("| %6.*s ", int(detail.size()), detail.data()); - printf("|\n"); - fflush(stdout); +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } } + out << *s; + s++; } } -} // namespace logger } // namespace ondemand } // namespace lasx } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H -/* end file simdjson/generic/ondemand/logger-inl.h for lasx */ -/* including simdjson/generic/ondemand/object-inl.h for lasx: #include "simdjson/generic/ondemand/object-inl.h" */ -/* begin file simdjson/generic/ondemand/object-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline char simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ +/* including simdjson/generic/ondemand/serialization-inl.h for lasx: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { -namespace lasx { -namespace ondemand { -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); -} -simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); } -simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); + + +inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; - } - return value(iter.child()); + +inline simdjson_result to_json_string(lasx::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { - logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); - return NO_SUCH_FIELD; + +inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace lasx::ondemand; + lasx::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + lasx::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + lasx::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); } - return value(iter.child()); } -simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); +inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); + +inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); } -simdjson_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object object::resume(const value_iterator &iter) noexcept { - return iter; +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } -simdjson_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); } +} // namespace simdjson -simdjson_inline simdjson_result object::begin() noexcept { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); +namespace simdjson { namespace lasx { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } } -simdjson_inline simdjson_result object::end() noexcept { - return object_iterator(iter); +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); } else { - child = find_field(key); + throw simdjson::simdjson_error(error); } - if(child.error()) { - return child; // we do not continue if there was an error +} +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); } - return child; } +#endif -inline simdjson_result object::at_path(std::string_view json_path) noexcept { - auto json_pointer = json_path_to_pointer_conversion(json_path); - if (json_pointer == "-1") { - return INVALID_JSON_POINTER; +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); } - return at_pointer(json_pointer); } - -simdjson_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } } +#endif +}}} // namespace simdjson::lasx::ondemand -simdjson_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for lasx */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ } -simdjson_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); } -} // namespace ondemand -} // namespace lasx -} // namespace simdjson -namespace simdjson { +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} -simdjson_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); +simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { + return *(position+2) - *(position) > *(position+1) - *(position) ? + *(position+1) - *(position) + : *(position+2) - *(position); } -simdjson_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; } -simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; } -simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; } - -simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; } - -simdjson_inline simdjson_result simdjson_result::at_path( - std::string_view json_path) noexcept { - if (error()) { - return error(); - } - return first.at_path(json_path); +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; } -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); -} +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); -} +namespace simdjson { -simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} -simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { - if (error()) { return error(); } - return first.raw_json(); -} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H -/* end file simdjson/generic/ondemand/object-inl.h for lasx */ -/* including simdjson/generic/ondemand/object_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { namespace lasx { namespace ondemand { -// -// object_iterator -// - -simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} -simdjson_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); } -simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); } -simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY(end_container()); + return false; + } + return true; } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; } -SIMDJSON_POP_DISABLE_WARNINGS -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); -} // namespace ondemand -} // namespace lasx -} // namespace simdjson + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} -namespace simdjson { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; -simdjson_inline simdjson_result::simdjson_result( - lasx::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; -simdjson_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } -} // namespace simdjson + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } -#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/object_iterator-inl.h for lasx */ -/* including simdjson/generic/ondemand/parser-inl.h for lasx: #include "simdjson/generic/ondemand/parser-inl.h" */ -/* begin file simdjson/generic/ondemand/parser-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + // If the loop ended, we're out of fields to look at. + return false; +} -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; -namespace simdjson { -namespace lasx { -namespace ondemand { + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); -simdjson_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; -simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); #if SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); + // If we're past the end of the object, we're being iterated out of order. + // Note: this is not perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } #endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } #if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { - return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); -} + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field - json.remove_utf8_bom(); + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; } +SIMDJSON_POP_DISABLE_WARNINGS -#ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); - json.remove_utf8_bom(); + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); - // Run stage 1. - const simdjson::error_code err = implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular); - if (err) { - if (err != UNCLOSED_STRING) - return err; - } - return document::start({ reinterpret_cast(json.data()), this, true }); + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; } -#endif // SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; } - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); } -simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - json.remove_utf8_bom(); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); } +} - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { - buf += 3; - len -= 3; - } - if(allow_comma_separated && batch_size < len) { batch_size = len; } - return document_stream(*this, buf, len, batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); } -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_string(allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); } -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); } - -simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { - return _capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; } -simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { - return _max_depth; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { - uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } - -simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { - uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} -} // namespace ondemand -} // namespace lasx -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H -/* end file simdjson/generic/ondemand/parser-inl.h for lasx */ -/* including simdjson/generic/ondemand/raw_json_string-inl.h for lasx: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ -/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { - -namespace lasx { -namespace ondemand { - -simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} - -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } - +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - while(pos < target.size()) { - pos = target.find('"', pos); - if(pos == std::string_view::npos) { return true; } - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return number_type::big_integer; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; } - -simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - while(target[pos]) { - const char * result = strchr(target+pos, '"'); - if(result == nullptr) { return true; } - pos = result - target; - if(pos != 0 && target[pos-1] != '\\') { return false; } - if(pos > 1 && target[pos-2] == '\\') { - size_t backslash_count{2}; - for(size_t i = 3; i <= pos; i++) { - if(target[pos-i] == '\\') { backslash_count++; } - else { break; } - } - if(backslash_count % 2 == 0) { return false; } +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + if(numberparsing::check_if_integer(json, max_len)) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); + return BIGINT_ERROR; } - pos++; + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); + return NUMBER_ERROR; } - return true; + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +template +simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { + std::string_view content; + auto err = get_root_string(check_trailing, allow_replacement).get(content); + if (err) { return err; } + receiver = content; + return SUCCESS; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } -simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters("), and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; } - if(r[pos] != '"') { return false; } - return true; + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; } - -simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("bool"); + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } -simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_root_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } else if (json[0] == 'n') { + return incorrect_type_error("Not a null but starts with n"); } - if(r[pos] != '"') { return false; } - return true; + return result; } -simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); -simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; + return _json_iter->skip_child(depth()); } -simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; } -simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); } +SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { - return iter.unescape(*this, allow_replacement); +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); } -simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { - return iter.unescape_wobbly(*this); +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); } -simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; } -} // namespace ondemand -} // namespace lasx -} // namespace simdjson - -namespace simdjson { - -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} -simdjson_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { - if (error()) { return error(); } - return first.unescape(iter, allow_replacement); +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; } -simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape_wobbly(iter); +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; } -} // namespace simdjson - -#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H -/* end file simdjson/generic/ondemand/raw_json_string-inl.h for lasx */ -/* including simdjson/generic/ondemand/serialization-inl.h for lasx: #include "simdjson/generic/ondemand/serialization-inl.h" */ -/* begin file simdjson/generic/ondemand/serialization-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { + return _json_iter->peek_root_length(start_position()); } +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(lasx::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); } -inline simdjson_result to_json_string(lasx::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } -inline simdjson_result to_json_string(lasx::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace lasx::ondemand; - lasx::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - lasx::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - lasx::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -inline simdjson_result to_json_string(lasx::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} +simdjson_warn_unused simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } -inline simdjson_result to_json_string(lasx::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + return SUCCESS; } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_root(); + return _json_iter->peek(); } +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); + assert_at_non_root_start(); + return _json_iter->peek(); } -} // namespace simdjson -namespace simdjson { namespace lasx { namespace ondemand { +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -#endif +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; } -#endif -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::lasx::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); } -#endif -}}} // namespace simdjson::lasx::ondemand -#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H -/* end file simdjson/generic/ondemand/serialization-inl.h for lasx */ -/* including simdjson/generic/ondemand/token_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H - -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - -namespace simdjson { -namespace lasx { -namespace ondemand { +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} -simdjson_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; } -simdjson_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} -simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); } -simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); } -simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); } -simdjson_inline uint32_t token_iterator::peek_root_length(token_position position) const noexcept { - return *(position+2) - *(position) > *(position+1) - *(position) ? - *(position+1) - *(position) - : *(position+2) - *(position); +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); } -simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); } -simdjson_inline token_position token_iterator::position() const noexcept { - return _position; +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; } -simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return json_type::unknown; + } } -simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; } -simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); } -simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); } -simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); } -simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); } } // namespace ondemand @@ -122228,1156 +151113,3592 @@ simdjson_inline bool token_iterator::operator<=(const token_iterator &other) con namespace simdjson { -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} +simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} } // namespace simdjson -#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/token_iterator-inl.h for lasx */ -/* including simdjson/generic/ondemand/value_iterator-inl.h for lasx: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ -/* begin file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ -#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ + +// JSON builder inline definitions +/* including simdjson/generic/ondemand/json_string_builder-inl.h for lasx: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ -/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + namespace simdjson { namespace lasx { -namespace ondemand { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array + json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -simdjson_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} +/** -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); +A possible SWAR implementation of has_json_escapable_byte. It is not used +because it is slower than the current implementation. It is kept here for +reference (to show that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if (json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); } - return true; + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = _mm_loadu_si128( + reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); } +#endif -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should - // call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != '}') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); } - // If the last character is } *and* the first gibberish character is also '}' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed object. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for +// the characters that MUST be escaped: quotation mark, reverse solidus, and the +// control characters (U+0000 through U+001F). There are two-character sequence +// escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; } + escape_json_char(input[location], out); + location += 1; } - return SUCCESS; + return out - initout; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { - auto error = check_root_object(); - if(error) { return error; } - return started_object(); +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; } -simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - return false; +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif +namespace internal { + +template ::value>::type> +simdjson_really_inline int int_log2(number_type x) { + return 63 - leading_zeroes(uint64_t(x) | 1); +} + +simdjson_really_inline int fast_digit_count_32(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int fast_digit_count_64(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + SIMDJSON_IF_CONSTEXPR(sizeof(number_type) <= 4) { + return fast_digit_count_32(static_cast(v)); + } + else { + return fast_digit_count_64(static_cast(v)); + } +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + // by always writing the minus sign, we avoid the branch. + buffer.get()[position] = '-'; + position += negative ? 1 : 0; + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100) * 2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); } + } +} - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); } +} - // If the loop ended, we're out of fields to look at. - return false; +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } } -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); +simdjson_inline void +string_builder::escape_and_append_with_quotes(const char *input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void string_builder::escape_and_append_with_quotes() noexcept { + escape_and_append_with_quotes(constevalutil::string_constant::value); +} +#endif - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} -#if SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this is not perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#if SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif + append_null(); } +} - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field - - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } +template + requires(require_custom_serialization) +simdjson_inline void string_builder::append(T &&val) { + serialize(*this, std::forward(val)); +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; +template + requires(std::is_convertible::value || + std::is_same::value) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS +// Support for range-based appending (std::ranges::view, etc.) +template + requires(!std::is_convertible::value && !require_custom_serialization) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair>) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } +} - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field +#endif - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(operator std::string_view()); +} - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; + return std::string_view(buffer.get(), position); } -SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); } -simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } } -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; } - _json_iter->descend_to(depth()+1); -#if SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); +} + +template +simdjson_inline void +string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert(std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } +} + +#if SIMDJSON_SUPPORTS_CONCEPTS +template +simdjson_inline void +string_builder::append_key_value(value_type value) noexcept { + escape_and_append_with_quotes(); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR( + std::is_convertible::value) { + escape_and_append_with_quotes(value); + } + else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } + else { + append(value); + } +} #endif - return true; + +} // namespace builder +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_builder.h for lasx: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace lasx { +namespace builder { + +template + requires(concepts::container_but_not_string && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + auto it = t.begin(); + auto end = t.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() ) { - // The following lines do not fully protect against garbage content within the - // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should - // also call `at_end()` on the document instance at the end of the processing to - // ensure that the processing has finished at the end. - // - if (*_json_iter->peek_last() != ']') { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - // If the last character is ] *and* the first gibberish character is also ']' - // then on-demand could accidentally go over. So we need additional checks. - // https://github.com/simdjson/simdjson/issues/1834 - // Checking that the document is balanced requires a full scan which is potentially - // expensive, but it only happens in edge cases where the first padding character is - // a closing bracket. - if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { - _json_iter->abandon(); - // The exact error would require more work. It will typically be an unclosed array. - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); } - return SUCCESS; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { - auto error = check_root_array(); - if (error) { return error; } - return started_array(); + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); +// Support for optional types (std::optional, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template + requires(!require_custom_serialization) +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { - bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); - // if we start with 'n', we must be a null - if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } - return is_null_string; + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v && !require_custom_serialization) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { - return get_raw_json_string().unescape(json_iter(), allow_replacement); +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); } -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_string(string_type& receiver, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_string(allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { - return get_raw_json_string().unescape_wobbly(json_iter()); + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; + +template + requires(!concepts::container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +template + requires(!require_custom_serialization) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !concepts::container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; + +// works for container that have begin() and end() iterators +template + requires(concepts::container_but_not_string && !require_custom_serialization) +void append(string_builder &b, const Z &z) { + auto it = z.begin(); + auto end = z.end(); + if (it == end) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, *it); + ++it; + for (; it != end; ++it) { + b.append(','); + atom(b, *it); + } + b.append(']'); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + +template + requires (require_custom_serialization) +void append(string_builder &b, const Z &z) { + b.append(z); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; + + +template +simdjson_warn_unused simdjson_result to_json_string(const Z &z, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; + +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::is_null() noexcept { - bool is_null_value; - SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); - if(is_null_value) { advance_non_root_scalar("null"); } - return is_null_value; + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; } -simdjson_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); + +// extract_from: Serialize only specific fields from a struct to JSON +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +void extract_from(string_builder &b, const T &obj) { + // Helper to check if a field name matches any of the requested fields + auto should_extract = [](std::string_view field_name) constexpr -> bool { + return ((FieldNames.view() == field_name) || ...); + }; + + b.append('{'); + bool first = true; + + // Iterate through all members of T using reflection + template for (constexpr auto mem : std::define_static_array( + std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + + if constexpr (std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + + // Only serialize this field if it's in our list of requested fields + if constexpr (should_extract(key)) { + if (!first) { + b.append(','); + } + first = false; + + // Serialize the key + constexpr auto quoted_key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(mem))); + b.append_raw(quoted_key); + b.append(':'); + + // Serialize the value + atom(b, obj.[:mem:]); + } + } + }; + + b.append('}'); } -simdjson_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); + +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = string_builder::DEFAULT_INITIAL_CAPACITY) { + string_builder b(initial_capacity); + extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); + +} // namespace builder +} // namespace lasx +// Alias the function template to 'to' in the global namespace +template +simdjson_warn_unused simdjson_result to_json(const Z &z, size_t initial_capacity = lasx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lasx::builder::string_builder b(initial_capacity); + lasx::builder::append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} +template +simdjson_warn_unused simdjson_error to_json(const Z &z, std::string &s, size_t initial_capacity = lasx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lasx::builder::string_builder b(initial_capacity); + lasx::builder::append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; } -simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); +// Global namespace function for extract_from +template + requires(std::is_class_v && (sizeof...(FieldNames) > 0)) +simdjson_warn_unused simdjson_result extract_from(const T &obj, size_t initial_capacity = lasx::builder::string_builder::DEFAULT_INITIAL_CAPACITY) { + lasx::builder::string_builder b(initial_capacity); + lasx::builder::extract_from(b, obj); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); } -simdjson_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; + +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for lasx */ + +// JSON path accessor (compile-time) - must be after inline definitions +/* including simdjson/generic/ondemand/compile_time_accessors.h for lasx: #include "simdjson/generic/ondemand/compile_time_accessors.h" */ +/* begin file simdjson/generic/ondemand/compile_time_accessors.h for lasx */ +/** + * Compile-time JSON Path and JSON Pointer accessors using C++26 reflection (P2996) + * + * This file validates JSON paths/pointers against struct definitions at compile time + * and generates optimized accessor code with zero runtime overhead. + * + * ## How It Works + * + * **Compile Time**: Path is parsed, validated against struct, types are checked + * **Runtime**: Direct navigation with no parsing or validation overhead + * + * Example: + * ```cpp + * struct User { std::string name; std::vector emails; }; + * + * std::string email; + * path_accessor::extract_field(doc, email); + * + * // Compile time validates: + * // 1. User has "emails" field + * // 2. "emails" is array-like + * // 3. Element type is std::string + * // 4. static_assert(^^std::string == ^^std::string) + * + * // Runtime just navigates: + * // doc.get_object().find_field("emails").get_array().at(0).get(email) + * ``` + * + * ## Key Reflection APIs + * + * - `^^Type`: Reflect operator, converts type to std::meta::info + * - `std::meta::nonstatic_data_members_of(type)`: Get all fields of a struct + * - `std::meta::identifier_of(member)`: Get field name as string_view + * - `std::meta::type_of(member)`: Get reflected type of a field + * - `std::meta::is_array_type(type)`: Check if C-style array + * - `std::meta::remove_extent(array)`: Extract element type from array + * - `std::meta::members_of(type)`: Get all members including typedefs + * - `std::meta::is_type(member)`: Check if member is a type (vs field) + * + * All operations execute at compile time in consteval contexts. + */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H */ +/* amalgamation skipped (editor-only): */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Arguably, we should just check SIMDJSON_STATIC_REFLECTION since it +// is unlikely that we will have reflection support without concepts support. +#if SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION + +#include +#include +#include + +namespace simdjson { +namespace lasx { +namespace ondemand { +/*** + * JSONPath implementation for compile-time access + * RFC 9535 JSONPath: Query Expressions for JSON, https://www.rfc-editor.org/rfc/rfc9535 + */ +namespace json_path { + +// Note: value type must be fully defined before this header is included +// This is ensured by including this in amalgamated.h after value-inl.h + +using ::simdjson::lasx::ondemand::value; + +// Path step types +enum class step_type { + field, // .field_name or ["field_name"] + array_index // [index] +}; + +// Represents a single step in a JSON path +template +struct path_step { + step_type type; + char key[N]; // Field name (empty for array indices) + std::size_t index; // Array index (0 for field access) + + constexpr path_step(step_type t, const char (&k)[N], std::size_t idx = 0) + : type(t), index(idx) { + for (std::size_t i = 0; i < N; ++i) { + key[i] = k[i]; + } + } + + constexpr std::string_view key_view() const { + return {key, N - 1}; + } +}; + +// Helper to create field step +template +consteval auto make_field_step(const char (&name)[N]) { + return path_step(step_type::field, name, 0); } -simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. +// Helper to create array index step +consteval auto make_index_step(std::size_t idx) { + return path_step<1>(step_type::array_index, "", idx); +} + +// Parse state for compile-time JSON path parsing +struct parse_result { + bool success; + std::size_t pos; + std::string_view error_msg; +}; + +// Compile-time JSON path parser +// Supports subset: .field, ["field"], [index], nested combinations +template +struct json_path_parser { + static constexpr std::string_view path_str = Path.view(); + + // Skip leading $ if present + static consteval std::size_t skip_root() { + if (!path_str.empty() && path_str[0] == '$') { + return 1; + } + return 0; + } + + // Count the number of steps in the path at compile time + static consteval std::size_t count_steps() { + std::size_t count = 0; + std::size_t i = skip_root(); + + while (i < path_str.size()) { + if (path_str[i] == '.') { + // Field access: .field + ++i; + if (i >= path_str.size()) break; + + // Skip field name + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[') { + ++i; + } + ++count; + } else if (path_str[i] == '[') { + // Array or bracket notation + ++i; + if (i >= path_str.size()) break; + + if (path_str[i] == '"' || path_str[i] == '\'') { + // Field access: ["field"] or ['field'] + char quote = path_str[i]; + ++i; + while (i < path_str.size() && path_str[i] != quote) { + ++i; + } + if (i < path_str.size()) ++i; // skip closing quote + if (i < path_str.size() && path_str[i] == ']') ++i; + } else { + // Array index: [0], [123] + while (i < path_str.size() && path_str[i] != ']') { + ++i; + } + if (i < path_str.size()) ++i; // skip ] + } + ++count; + } else { + ++i; + } + } + + return count; + } + + // Parse a field name at compile time + static consteval std::size_t parse_field_name(std::size_t start, char* out, std::size_t max_len) { + std::size_t len = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] != '.' && path_str[i] != '[' && len < max_len - 1) { + out[len++] = path_str[i++]; + } + out[len] = '\0'; + return i; + } + + // Parse an array index at compile time + static consteval std::pair parse_array_index(std::size_t start) { + std::size_t index = 0; + std::size_t i = start; + + while (i < path_str.size() && path_str[i] >= '0' && path_str[i] <= '9') { + index = index * 10 + (path_str[i] - '0'); + ++i; + } + + return {i, index}; + } +}; + +// Compile-time path accessor generator +template +struct path_accessor { + using value = ::simdjson::lasx::ondemand::value; + + static constexpr auto parser = json_path_parser(); + static constexpr std::size_t num_steps = parser.count_steps(); + static constexpr std::string_view path_view = Path.view(); + + // Compile-time accessor generation + // If T is a struct, validates the path at compile time + // If T is void, skips validation + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + // Validate path at compile time if T is a struct + if constexpr (std::is_class_v) { + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + } + + // Parse the path at compile time to build access steps + return access_impl(doc_or_val.get_value()); + } + + // Extract value at path directly into target with compile-time type validation + // Example: std::string name; path_accessor::extract_field(doc, name); + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); + + // Validate path exists in struct definition + constexpr bool path_valid = validate_path(); + static_assert(path_valid, "JSON path does not match struct definition"); + + // Get the type at the end of the path + constexpr auto final_type = get_final_type(); + + // Verify target type matches the field type + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the path"); + + // All validation done at compile time - just navigate and extract + auto json_value = access_impl(doc_or_val.get_value()); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); + } + +private: + // Get the final type by walking the path through the struct type + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + // .field syntax + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) break; + + if (path_view[i] == '"' || path_view[i] == '\'') { + // ["field"] syntax + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + auto members = std::meta::nonstatic_data_members_of( + current_type, std::meta::access_context::unchecked() + ); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + break; + } + } + + } else { + // [index] syntax - extract element type + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + current_type = get_element_type_reflected(current_type); + } + } else { + ++i; + } + } + + return current_type; + } + +private: + // Walk path and extract directly into final field using compile-time reflection + template + static inline error_code extract_with_reflection(simdjson_result current, TargetType& target_ref) noexcept { + if (current.error()) return current.error(); + + // Base case: end of path - extract into target + if constexpr (PathPos >= path_view.size()) { + return current.get(target_ref); + } + // Field access: .field_name + else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } + // Bracket notation: [index] or ["field"] + else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + constexpr auto member_info = find_member_by_name(CurrentType, field_name); + static_assert(member_info != ^^void, "Field not found in struct"); + constexpr auto member_type = std::meta::type_of(member_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + auto obj = obj_result.value_unsafe(); + auto field_value = obj.find_field_unordered(field_name); + + if constexpr (next_pos >= path_view.size()) { + return field_value.get(target_ref); + } else { + return extract_with_reflection(field_value, target_ref); + } + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + constexpr auto elem_type = get_element_type_reflected(CurrentType); + static_assert(elem_type != ^^void, "Could not determine array element type"); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + auto arr = arr_result.value_unsafe(); + auto elem_value = arr.at(index); + + if constexpr (next_pos >= path_view.size()) { + return elem_value.get(target_ref); + } else { + return extract_with_reflection(elem_value, target_ref); + } + } + } + // Skip unexpected characters and continue + else { + return extract_with_reflection(current, target_ref); + } + } + + // Find member by name in reflected type + static consteval std::meta::info find_member_by_name(std::meta::info type_refl, std::string_view name) { + auto members = std::meta::nonstatic_data_members_of(type_refl, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == name) { + return mem; + } + } + } + + // Generate compile-time accessor code by walking the path + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if (current.error()) return current; + + if constexpr (PathPos >= path_view.size()) { + return current; + } else if constexpr (path_view[PathPos] == '.') { + constexpr auto field_info = parse_next_field(PathPos); + constexpr std::string_view field_name = std::get<0>(field_info); + constexpr std::size_t next_pos = std::get<1>(field_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else if constexpr (path_view[PathPos] == '[') { + constexpr auto bracket_info = parse_bracket(PathPos); + constexpr bool is_field = std::get<0>(bracket_info); + constexpr std::size_t next_pos = std::get<2>(bracket_info); + + if constexpr (is_field) { + constexpr std::string_view field_name = std::get<1>(bracket_info); + + auto obj_result = current.get_object(); + if (obj_result.error()) return obj_result.error(); + + auto obj = obj_result.value_unsafe(); + auto next_value = obj.find_field_unordered(field_name); + + return access_impl(next_value); + + } else { + constexpr std::size_t index = std::get<3>(bracket_info); + + auto arr_result = current.get_array(); + if (arr_result.error()) return arr_result.error(); + + auto arr = arr_result.value_unsafe(); + auto next_value = arr.at(index); + + return access_impl(next_value); + } + } else { + return access_impl(current); + } + } + + // Parse next field name + static consteval auto parse_next_field(std::size_t start) { + std::size_t i = start + 1; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + return std::make_tuple(field_name, i); + } + + // Parse bracket notation: returns (is_field, field_name, next_pos, index) + static consteval auto parse_bracket(std::size_t start) { + std::size_t i = start + 1; // skip '[' + + if (i < path_view.size() && (path_view[i] == '"' || path_view[i] == '\'')) { + // Field access + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; // skip closing quote + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(true, field_name, i, std::size_t(0)); + } else { + // Array index + std::size_t index = 0; + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + index = index * 10 + (path_view[i] - '0'); + ++i; + } + if (i < path_view.size() && path_view[i] == ']') ++i; + + return std::make_tuple(false, std::string_view{}, i, index); + } } - auto answer = numberparsing::is_integer(tmpbuf); - // If the parsing was a success, we must still check that it is - // a single scalar. Note that we parse first because of cases like '[]' where - // getting TRAILING_CONTENT is wrong. - if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } - return answer; -} -simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return number_type::big_integer; +public: + // Check if reflected type is array-like (C-style array or indexable container) + // Uses reflection to test: 1) std::meta::is_array_type() for C arrays + // 2) std::meta::substitute() to test concepts::indexable_container concept + static consteval bool is_array_like_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return true; } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; + + if (std::meta::can_substitute(^^concepts::indexable_container_v, {type_reflection})) { + return std::meta::extract(std::meta::substitute(^^concepts::indexable_container_v, {type_reflection})); + } + return false; } - auto answer = numberparsing::get_number_type(tmpbuf); - if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - return answer; -} -simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - // NOTE: the current approach doesn't work for very big integer numbers containing more than 1074 digits. - uint8_t tmpbuf[1074+8+1+1]; - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - if(numberparsing::check_if_integer(json, max_len)) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - logger::log_error(*_json_iter, start_position(), depth(), "Found big integer"); - return BIGINT_ERROR; + + // Extract element type from reflected array or container + // For C arrays: uses std::meta::remove_extent() + // For containers: finds value_type member using std::meta::members_of() + static consteval std::meta::info get_element_type_reflected(std::meta::info type_reflection) { + if (std::meta::is_array_type(type_reflection)) { + return std::meta::remove_extent(type_reflection); } - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters and not a big integer"); - return NUMBER_ERROR; + + auto members = std::meta::members_of(type_reflection, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::is_type(mem)) { + auto name = std::meta::identifier_of(mem); + if (name == "value_type") { + return mem; + } + } + } + return ^^void; } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("number"); - return num; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { - return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); -} -template -simdjson_warn_unused simdjson_inline error_code value_iterator::get_root_string(string_type& receiver, bool check_trailing, bool allow_replacement) noexcept { - std::string_view content; - auto err = get_root_string(check_trailing, allow_replacement).get(content); - if (err) { return err; } - receiver = content; - return SUCCESS; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { - return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_scalar("string"); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +private: + // Check if type has member with given name + template + static consteval bool has_member(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return true; + } + } + return false; } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Get type of member by name + template + static consteval auto get_member_type(std::string_view member_name) { + constexpr auto members = std::meta::nonstatic_data_members_of(^^Type, std::meta::access_context::unchecked()); + for (auto mem : members) { + if (std::meta::identifier_of(mem) == member_name) { + return std::meta::type_of(mem); + } + } + return ^^void; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Check if non-reflected type is array-like + template + static consteval bool is_container_type() { + using BaseType = std::remove_cvref_t; + if constexpr (requires { typename BaseType::value_type; }) { + return true; + } + if constexpr (std::is_array_v) { + return true; + } + return false; } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("uint64"); + + // Extract element type from non-reflected container + template + using extract_element_type = std::conditional_t< + requires { typename std::remove_cvref_t::value_type; }, + typename std::remove_cvref_t::value_type, + std::conditional_t< + std::is_array_v>, + std::remove_extent_t>, + void + > + >; + + // Validate path matches struct definition + static consteval bool validate_path() { + if constexpr (!std::is_class_v) { + return true; + } + + auto current_type = ^^T; + std::size_t i = parser.skip_root(); + + while (i < path_view.size()) { + if (path_view[i] == '.') { + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != '.' && path_view[i] != '[') { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else if (path_view[i] == '[') { + ++i; + if (i >= path_view.size()) return false; + + if (path_view[i] == '"' || path_view[i] == '\'') { + char quote = path_view[i]; + ++i; + std::size_t field_start = i; + while (i < path_view.size() && path_view[i] != quote) { + ++i; + } + + std::string_view field_name = path_view.substr(field_start, i - field_start); + if (i < path_view.size()) ++i; + if (i < path_view.size() && path_view[i] == ']') ++i; + + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == field_name) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) { + return false; + } + + } else { + while (i < path_view.size() && path_view[i] >= '0' && path_view[i] <= '9') { + ++i; + } + + if (i < path_view.size() && path_view[i] == ']') ++i; + + if (!is_array_like_reflected(current_type)) { + return false; + } + + auto new_type = get_element_type_reflected(current_type); + + if (new_type == ^^void) { + return false; + } + + current_type = new_type; + } + } else { + ++i; + } + } + + return true; } - return result; +}; + +// Compile-time path accessor with validation +template +inline simdjson_result<::simdjson::lasx::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); } -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::lasx::ondemand::value> at_path_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = path_accessor; + return accessor::access(doc_or_val); +} + +// ============================================================================ +// JSON Pointer Compile-Time Support (RFC 6901) +// ============================================================================ + +// JSON Pointer parser: /field/0/nested (slash-separated) +template +struct json_pointer_parser { + static constexpr std::string_view pointer_str = Pointer.view(); + + // Unescape token: ~0 -> ~, ~1 -> / + static consteval void unescape_token(std::string_view src, char* dest, std::size_t& out_len) { + out_len = 0; + for (std::size_t i = 0; i < src.size(); ++i) { + if (src[i] == '~' && i + 1 < src.size()) { + if (src[i + 1] == '0') { + dest[out_len++] = '~'; + ++i; + } else if (src[i + 1] == '1') { + dest[out_len++] = '/'; + ++i; + } else { + dest[out_len++] = src[i]; + } + } else { + dest[out_len++] = src[i]; + } + } } - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Check if token is numeric + static consteval bool is_numeric(std::string_view token) { + if (token.empty()) return false; + if (token[0] == '0' && token.size() > 1) return false; + for (char c : token) { + if (c < '0' || c > '9') return false; + } + return true; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer - tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; + + // Parse numeric token to index + static consteval std::size_t parse_index(std::string_view token) { + std::size_t result = 0; + for (char c : token) { + result = result * 10 + (c - '0'); + } + return result; } - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("int64"); + // Count tokens in pointer + static consteval std::size_t count_tokens() { + if (pointer_str.empty() || pointer_str == "/") return 0; + + std::size_t count = 0; + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + + while (pos < pointer_str.size()) { + ++count; + std::size_t next_slash = pointer_str.find('/', pos); + if (next_slash == std::string_view::npos) break; + pos = next_slash + 1; + } + + return count; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + + // Get Nth token + static consteval std::string_view get_token(std::size_t token_index) { + std::size_t pos = pointer_str[0] == '/' ? 1 : 0; + std::size_t current_token = 0; + + while (current_token < token_index) { + std::size_t next_slash = pointer_str.find('/', pos); + pos = next_slash + 1; + ++current_token; + } + + std::size_t token_end = pointer_str.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_str.size(); + + return pointer_str.substr(pos, token_end - pos); } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); +}; + +// JSON Pointer accessor +template +struct pointer_accessor { + using parser = json_pointer_parser; + static constexpr std::string_view pointer_view = Pointer.view(); + static constexpr std::size_t token_count = parser::count_tokens(); + + // Validate pointer against struct definition + static consteval bool validate_pointer() { + if constexpr (!std::is_class_v) { + return true; + } + + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; + + while (pos < pointer_view.size()) { + // Extract token up to next / + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); + + std::string_view token = pointer_view.substr(pos, token_end - pos); + + if (parser::is_numeric(token)) { + if (!path_accessor::is_array_like_reflected(current_type)) { + return false; + } + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + bool found = false; + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); + + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + found = true; + break; + } + } + + if (!found) return false; + } + + pos = token_end + 1; + } + + return true; } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. - tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; + // Recursive accessor + template + static inline simdjson_result access_impl(simdjson_result current) noexcept { + if constexpr (TokenIndex >= token_count) { + return current; + } else { + constexpr std::string_view token = parser::get_token(TokenIndex); + + if constexpr (parser::is_numeric(token)) { + constexpr std::size_t index = parser::parse_index(token); + auto arr = current.get_array().value_unsafe(); + auto next_value = arr.at(index); + return access_impl(next_value); + } else { + auto obj = current.get_object().value_unsafe(); + auto next_value = obj.find_field_unordered(token); + return access_impl(next_value); + } + } } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("double"); + + // Access JSON value at pointer + template + static inline simdjson_result access(DocOrValue& doc_or_val) noexcept { + if constexpr (std::is_class_v) { + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + } + + if (pointer_view.empty() || pointer_view == "/") { + if constexpr (requires { doc_or_val.get_value(); }) { + return doc_or_val.get_value(); + } else { + return doc_or_val; + } + } + + simdjson_result current = doc_or_val.get_value(); + return access_impl<0>(current); } - return result; -} -simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("bool"); - // We have a boolean if we have either "true" or "false" and the next character is either - // a structural character or whitespace. We also check that the length is correct: - // "true" and "false" are 4 and 5 characters long, respectively. - bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && - (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - return value_true; -} + // Extract value at pointer directly into target with type validation + template + static inline error_code extract_field(DocOrValue& doc_or_val, FieldType& target) noexcept { + static_assert(std::is_class_v, "extract_field requires T to be a struct type for validation"); -simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { - auto max_len = peek_root_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); - if(result) { // we have something that looks like a null. - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("null"); - } else if (json[0] == 'n') { - return incorrect_type_error("Not a null but starts with n"); + constexpr bool pointer_valid = validate_pointer(); + static_assert(pointer_valid, "JSON Pointer does not match struct definition"); + + constexpr auto final_type = get_final_type(); + static_assert(final_type == ^^FieldType, "Target type does not match the field type at the pointer"); + + simdjson_result current_value = doc_or_val.get_value(); + auto json_value = access_impl<0>(current_value); + if (json_value.error()) return json_value.error(); + + return json_value.get(target); } - return result; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); +private: + // Get final type by walking pointer through struct + template + static consteval std::enable_if_t, std::meta::info> get_final_type() { + auto current_type = ^^T; + std::size_t pos = pointer_view[0] == '/' ? 1 : 0; - return _json_iter->skip_child(depth()); -} + while (pos < pointer_view.size()) { + std::size_t token_end = pointer_view.find('/', pos); + if (token_end == std::string_view::npos) token_end = pointer_view.size(); -simdjson_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} + std::string_view token = pointer_view.substr(pos, token_end - pos); -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS + if (parser::is_numeric(token)) { + current_type = path_accessor::get_element_type_reflected(current_type); + } else { + auto members = std::meta::nonstatic_data_members_of(current_type, std::meta::access_context::unchecked()); -simdjson_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} + for (auto mem : members) { + if (std::meta::identifier_of(mem) == token) { + current_type = std::meta::type_of(mem); + break; + } + } + } -simdjson_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} + pos = token_end + 1; + } -simdjson_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} + return current_type; + } +}; -simdjson_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); +// Compile-time JSON Pointer accessor with validation +template +inline simdjson_result<::simdjson::lasx::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; +// Overload without type parameter (no validation) +template +inline simdjson_result<::simdjson::lasx::ondemand::value> at_pointer_compiled(DocOrValue& doc_or_val) noexcept { + using accessor = pointer_accessor; + return accessor::access(doc_or_val); } -simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); -} -simdjson_inline uint32_t value_iterator::peek_root_length() const noexcept { - return _json_iter->peek_root_length(start_position()); -} +} // namespace json_path +} // namespace ondemand +} // namespace lasx +} // namespace simdjson -simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } +#endif // SIMDJSON_SUPPORTS_CONCEPTS && SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_ONDEMAND_COMPILE_TIME_ACCESSORS_H - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} +/* end file simdjson/generic/ondemand/compile_time_accessors.h for lasx */ -simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } +/* end file simdjson/generic/ondemand/amalgamated.h for lasx */ +/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ +/* begin file simdjson/lasx/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/lasx/end.h */ -simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#if SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif // SIMDJSON_LASX_ONDEMAND_H +/* end file simdjson/lasx/ondemand.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION #endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { + /** + * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand + */ + namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_ONDEMAND_H +/* end file simdjson/builtin/ondemand.h */ + +namespace simdjson { + /** + * @copydoc simdjson::builtin::ondemand + */ + namespace ondemand = builtin::ondemand; + /** + * @copydoc simdjson::builtin::builder + */ + namespace builder = builtin::builder; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ + template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) + inline std::string to_json_string(const T& obj) { + builder::string_builder str_builder; + append(str_builder, obj); + std::string_view view; + if (str_builder.view().get(view) == SUCCESS) { + return std::string(view); + } + return ""; } +#endif + +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_H +/* end file simdjson/ondemand.h */ +/* including simdjson/convert.h: #include "simdjson/convert.h" */ +/* begin file simdjson/convert.h */ + +#ifndef SIMDJSON_CONVERT_H +#define SIMDJSON_CONVERT_H + +/* skipped duplicate #include "simdjson/ondemand.h" */ +#include + +#if SIMDJSON_SUPPORTS_CONCEPTS + + +namespace simdjson { +namespace convert { +namespace internal { + +/** + * A utility class for automatically parsing JSON documents. + * This template is NOT part of our public API. + * It is subject to changes. + * @private + */ +template +struct auto_parser { +private: + parser_type m_parser; + ondemand::document m_doc; + error_code m_error{SUCCESS}; + + template + static constexpr bool is_nothrow_gettable = requires(ondemand::document doc) { + { doc.get() } noexcept; + }; +public: + explicit auto_parser(parser_type &&parser, ondemand::document &&doc) noexcept requires(!std::is_pointer_v); + explicit auto_parser(parser_type &&parser, padded_string_view const str) noexcept requires(!std::is_pointer_v); + explicit auto_parser(std::remove_pointer_t &parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v); + explicit auto_parser(std::remove_pointer_t &parser, padded_string_view const str) noexcept requires(std::is_pointer_v); + explicit auto_parser(padded_string_view const str) noexcept requires(std::is_pointer_v); + explicit auto_parser(parser_type parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v); + auto_parser(auto_parser const &) = delete; + auto_parser &operator=(auto_parser const &) = delete; + auto_parser(auto_parser &&) noexcept = default; + auto_parser &operator=(auto_parser &&) noexcept = default; + ~auto_parser() = default; + + simdjson_warn_unused std::remove_pointer_t &parser() noexcept; + + template + simdjson_warn_unused simdjson_inline simdjson_result result() noexcept(is_nothrow_gettable); + template + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept(is_nothrow_gettable); + + + simdjson_warn_unused simdjson_inline simdjson_result array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result number() noexcept; + + +#if SIMDJSON_EXCEPTIONS + template + simdjson_warn_unused simdjson_inline explicit(false) operator T() noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + template + simdjson_warn_unused simdjson_inline std::optional optional() noexcept(is_nothrow_gettable); +}; + + +/** + * A utility class for adapting values for the `auto_parser`. + * This template is not part of our public API. It is subject to changes. + * @private + */ +template +struct to_adaptor { + T operator()(simdjson_result &val) const noexcept; + auto operator()(padded_string_view const str) const noexcept; + auto operator()(ondemand::parser &parser, padded_string_view const str) const noexcept; + // The std::string is padded with reserve to ensure there is enough space for padding. + // Some sanitizers may not like this, so you can use simdjson::pad instead. + // simdjson::from(simdjson::pad(str)) + auto operator()(std::string str) const noexcept; + auto operator()(ondemand::parser &parser, std::string str) const noexcept; +}; +// deduction guide +auto_parser(padded_string_view const str) -> auto_parser; +} // namespace internal +} // namespace convert + +/** + * The simdjson::from instance is EXPERIMENTAL AND SUBJECT TO CHANGES. + * + * The `from` instance is a utility adaptor for parsing JSON strings into objects. + * It provides a convenient way to convert JSON data into C++ objects using the `auto_parser`. + * + * Example usage: + * + * ```cpp + * std::map obj = + * simdjson::from(R"({"key": "value"})"_padded); + * ``` + * + * This will parse the JSON string and return an object representation. By default, we + * use the simdjson::ondemand::parser::get_parser() instance. A parser instance should + * be used for just one document at a time. + * + * You can also pass you own parser instance: + * ```cpp + * simdjson::ondemand::parser parser; + * std::map obj = + * simdjson::from(parser, R"({"key": "value"})"_padded); + * ``` + * The parser instance can be reused. + * + * This functionality requires C++20 or better. + */ +static constexpr convert::internal::to_adaptor<> from{}; + +} // namespace simdjson + +#endif // SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_CONVERT_H +/* end file simdjson/convert.h */ +/* including simdjson/convert-inl.h: #include "simdjson/convert-inl.h" */ +/* begin file simdjson/convert-inl.h */ + +#ifndef SIMDJSON_CONVERT_INL_H +#define SIMDJSON_CONVERT_INL_H + +/* skipped duplicate #include "simdjson/convert.h" */ +#if SIMDJSON_SUPPORTS_CONCEPTS +namespace simdjson { +namespace convert { +namespace internal { +// auto_parser method definitions +template +inline auto_parser::auto_parser(parser_type &&parser, ondemand::document &&doc) noexcept requires(!std::is_pointer_v) + : m_parser{std::move(parser)}, m_doc{std::move(doc)} {} + +template +inline auto_parser::auto_parser(parser_type &&parser, padded_string_view const str) noexcept requires(!std::is_pointer_v) + : m_parser{std::move(parser)}, m_doc{}, m_error{SUCCESS} { + m_error = m_parser.iterate(str).get(m_doc); +} +template +inline auto_parser::auto_parser(std::remove_pointer_t &parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v) + : m_parser{&parser}, m_doc{std::move(doc)} {} - return SUCCESS; +template +inline auto_parser::auto_parser(std::remove_pointer_t &parser, padded_string_view const str) noexcept requires(std::is_pointer_v) + : m_parser{&parser}, m_doc{}, m_error{SUCCESS} { + m_error = m_parser->iterate(str).get(m_doc); } +template +inline auto_parser::auto_parser(padded_string_view const str) noexcept requires(std::is_pointer_v) + : auto_parser{ondemand::parser::get_parser(), str} {} -simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } +template +inline auto_parser::auto_parser(parser_type parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v) + : auto_parser{*parser, std::move(doc)} {} - assert_at_root(); - return _json_iter->peek(); -} -simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } - assert_at_non_root_start(); - return _json_iter->peek(); -} -simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); + +template +inline std::remove_pointer_t &auto_parser::parser() noexcept { + if constexpr (std::is_pointer_v) { + return *m_parser; + } else { + return m_parser; + } } -simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); +template +template +inline simdjson_result auto_parser::result() noexcept(is_nothrow_gettable) { + if (m_error != SUCCESS) { + return m_error; + } + return m_doc.get(); } -simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; +template +template +simdjson_warn_unused simdjson_inline error_code auto_parser::get(T &value) && noexcept(is_nothrow_gettable) { + return result().get(value); } -simdjson_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); +template +inline simdjson_result auto_parser::array() noexcept { + return result(); } -simdjson_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +template +inline simdjson_result auto_parser::object() noexcept { + return result(); } -simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; +template +inline simdjson_result auto_parser::number() noexcept { + return result(); } -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +#if SIMDJSON_EXCEPTIONS +template +template +inline auto_parser::operator T() noexcept(false) { + if (m_error != SUCCESS) { + throw simdjson_error(m_error); + } + return m_doc.get(); } +#endif // SIMDJSON_EXCEPTIONS -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +template +template +inline std::optional auto_parser::optional() noexcept(is_nothrow_gettable) { + if (m_error != SUCCESS) { + return std::nullopt; + } + T value; + if (m_doc.get().get(value)) [[unlikely]] { + return std::nullopt; + } + return {std::move(value)}; } -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); +// to_adaptor method definitions +template +inline T to_adaptor::operator()(simdjson_result &val) const noexcept { + return val.get(); } -simdjson_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); +template +inline auto to_adaptor::operator()(padded_string_view const str) const noexcept { + return auto_parser{str}; } -simdjson_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); +template +inline auto to_adaptor::operator()(ondemand::parser &parser, padded_string_view const str) const noexcept { + return auto_parser{parser, str}; } -simdjson_inline simdjson_result value_iterator::reset_array() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_array(); +template +inline auto to_adaptor::operator()(std::string str) const noexcept { + return auto_parser{pad_with_reserve(str)}; } -simdjson_inline simdjson_result value_iterator::reset_object() noexcept { - if(error()) { return error(); } - move_at_container_start(); - return started_object(); +template +inline auto to_adaptor::operator()(ondemand::parser &parser, std::string str) const noexcept { + return auto_parser{parser, pad_with_reserve(str)}; } +} // namespace internal +} // namespace convert +} // namespace simdjson +#endif // SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_CONVERT_INL_H +/* end file simdjson/convert-inl.h */ -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); +// Compile-time JSON parsing (C++26 P2996 reflection) +/* including simdjson/compile_time_json.h: #include "simdjson/compile_time_json.h" */ +/* begin file simdjson/compile_time_json.h */ +/** + * @file compile_time_json.h + * @brief Compile-time JSON parsing using C++26 reflection with + * std::meta::substitute() + */ + +#ifndef SIMDJSON_GENERIC_COMPILE_TIME_JSON_H +#define SIMDJSON_GENERIC_COMPILE_TIME_JSON_H + +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace simdjson { +namespace compile_time { + +/** + * @brief Compile-time JSON parser. This function parses the provided JSON + * string at compile time and returns a custom struct type representing the JSON + * object. + * + * We have a few limitations which trigger compile-time errors if violated: + * - Only JSON objects and arrays are supported at the top level (no primitives). + * We will lift this limitation in the future. + * - Strings are represented using the const char * in UTF-8, but they must not + * contain embedded nulls. We would prefer to represent them as std::string or + * std::string_view, and hope to do so in the future. + * - Heterogeneous arrays are not supported yet. E.g., you need to have arrays of + * all integers, or all strings, all floats, all compatible objects, etc. + * For example, the following is accepted: + * [ + * { "name": "Alice", "age": 30 }, + * { "name": "Bob", "age": 25 }, + * { "name": "Charlie", "age": 35 } + * ] + * but the following is not: + * [ + * { "name": "Alice", "age": 30 }, + * "Just a string", + * 42, + * { "name": "Charlie", "age": 35 } + * ] + * + * We may support heterogeneous arrays in the future with std::variant types. + * - We parse the first JSON document encountered in the string. Trailing + * characters are ignored. Thus if your JSON begins with {"a":1}, everything + * after the closing } is ignored. This limitation will be lifted in the future, + * reporting an error. + * + * These limitations are safe in the sense that they result in compile-time errors. + * Thus you will not get truncated strings or imprecise floats silently. + * + * This function is subject to change in the future. + */ +template consteval auto parse_json(); + +} // namespace compile_time +} // namespace simdjson + + +template +consteval auto operator ""_json() { + return simdjson::compile_time::parse_json(); } -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_COMPILE_TIME_JSON_H +/* end file simdjson/compile_time_json.h */ +/* including simdjson/compile_time_json-inl.h: #include "simdjson/compile_time_json-inl.h" */ +/* begin file simdjson/compile_time_json-inl.h */ +/** + * @file compile_time_json-inl.h + * @brief Implementation details for compile-time JSON parsing + * + * This file contains inline implementations and helper utilities for + * compile-time JSON parsing. Currently, the main implementation is + * self-contained in the header. + */ + +#ifndef SIMDJSON_GENERIC_COMPILE_TIME_JSON_INL_H + +#if SIMDJSON_STATIC_REFLECTION + +/* skipped duplicate #include "simdjson/compile_time_json.h" */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define simdjson_consteval_error(...) \ + { \ + std::abort(); \ + } + +namespace simdjson { +namespace compile_time { + +/** + * Namespace for number parsing utilities. + * We seek to provide exact compile-time number parsing functions. + * That is not trivial, but thankfully we can reuse much of the existing + * simdjson functionality. + * Importantly, it is not a trivial matter to provide correct rounding + * for floating-point numbers at compile-time. The fast_float library + * does it well. + */ +namespace number_parsing { + +// Counts the number of leading zeros in a 64-bit integer. +consteval int leading_zeroes(uint64_t input_num, int last_bit = 0) { + if (input_num & uint64_t(0xffffffff00000000)) { + input_num >>= 32; + last_bit |= 32; + } + if (input_num & uint64_t(0xffff0000)) { + input_num >>= 16; + last_bit |= 16; + } + if (input_num & uint64_t(0xff00)) { + input_num >>= 8; + last_bit |= 8; + } + if (input_num & uint64_t(0xf0)) { + input_num >>= 4; + last_bit |= 4; + } + if (input_num & uint64_t(0xc)) { + input_num >>= 2; + last_bit |= 2; + } + if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ + last_bit |= 1; + } + return 63 - last_bit; +} +// Multiplies two 32-bit unsigned integers and returns a 64-bit result. +consteval uint64_t emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } +consteval uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = (uint64_t)(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + (uint64_t)(lo < bd); + return lo; } -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); +// Represents a 128-bit unsigned integer as two 64-bit parts. +// We have a value128 struct elsewhere in the simdjson, but we +// use a separate one here for clarity. +struct value128 { + uint64_t low; + uint64_t high; + + constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + constexpr value128() : low(0), high(0) {} +}; + +// Multiplies two 64-bit integers and returns a 128-bit result as value128. +consteval value128 full_multiplication(uint64_t a, uint64_t b) { + value128 answer; + answer.low = umul128_generic(a, b, &answer.high); + return answer; } -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); +// Converts mantissa and exponent to a double, considering the sign. +consteval double to_double(uint64_t mantissa, int64_t exponent, bool negative) { + uint64_t sign_bit = negative ? (1ULL << 63) : 0; + uint64_t exponent_bits = (uint64_t(exponent) & 0x7FF) << 52; + uint64_t bits = sign_bit | exponent_bits | (mantissa & ((1ULL << 52) - 1)); + return std::bit_cast(bits); } -simdjson_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// Returns true on success, false on failure. +// Failure suggests and invalid input or out-of-range result. +consteval bool compute_float_64(int64_t power, uint64_t i, bool negative, + double &d) { + if (i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + int lz = leading_zeroes(i); + i <<= lz; + const uint32_t index = + 2 * uint32_t(power - simdjson::internal::smallest_power); + value128 firstproduct = full_multiplication( + i, simdjson::internal::powers_template<>::power_of_five_128[index]); + if ((firstproduct.high & 0x1FF) == 0x1FF) { + value128 secondproduct = full_multiplication( + i, simdjson::internal::powers_template<>::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if (secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + int64_t real_exponent = exponent - lz; + if (real_exponent <= 0) { + if (-real_exponent + 1 >= 64) { + d = negative ? -0.0 : 0.0; + return true; + } + mantissa >>= -real_exponent + 1; + mantissa += (mantissa & 1); + mantissa >>= 1; + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + if ((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1)) { + if ((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; + } + } + mantissa += mantissa & 1; + mantissa >>= 1; + if (mantissa >= (1ULL << 53)) { + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + if (real_exponent > 2046) { + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; } -simdjson_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; +// Parses a single digit character and updates the integer value. +consteval bool parse_digit(const char c, uint64_t &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; } + i = 10 * i + digit; + return true; } -simdjson_inline token_position value_iterator::start_position() const noexcept { - return _start_position; +// Parses a JSON float from a string starting at src. +// Returns the parsed double and the number of characters consumed. +consteval std::pair parse_double(const char *src, + const char *end) { + auto get_value = [&](const char *pointer) -> char { + if (pointer == end) { + return '\0'; + } + return *pointer; + }; + const char *srcinit = src; + bool negative = (get_value(src) == '-'); + src += uint8_t(negative); + uint64_t i = 0; + const char *p = src; + p += parse_digit(get_value(p), i); + bool leading_zero = (i == 0); + while (parse_digit(get_value(p), i)) { + p++; + } + if (p == src) { + simdjson_consteval_error("Invalid float value"); + } + if ((leading_zero && p != src + 1)) { + simdjson_consteval_error("Invalid float value"); + } + int64_t exponent = 0; + bool overflow; + if (get_value(p) == '.') { + p++; + const char *start_decimal_digits = p; + if (!parse_digit(get_value(p), i)) { + simdjson_consteval_error("Invalid float value"); + } // no decimal digits + p++; + while (parse_digit(get_value(p), i)) { + p++; + } + exponent = -(p - start_decimal_digits); + overflow = p - src - 1 > 19; + if (overflow && leading_zero) { + const char *start_digits = src + 2; + while (get_value(start_digits) == '0') { + start_digits++; + } + overflow = p - start_digits > 19; + } + } else { + overflow = p - src > 19; + } + if (overflow) { + simdjson_consteval_error( + "Overflow while computing the float value: too many digits"); + } + if (get_value(p) == 'e' || get_value(p) == 'E') { + p++; + bool exp_neg = get_value(p) == '-'; + p += exp_neg || get_value(p) == '+'; + uint64_t exp = 0; + const char *start_exp_digits = p; + while (parse_digit(get_value(p), exp)) { + p++; + } + if (p - start_exp_digits == 0 || p - start_exp_digits > 19) { + simdjson_consteval_error("Invalid float value"); + } + exponent += exp_neg ? 0 - exp : exp; + } + + overflow = overflow || exponent < simdjson::internal::smallest_power || + exponent > simdjson::internal::largest_power; + if (overflow) { + simdjson_consteval_error("Overflow while computing the float value"); + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + simdjson_consteval_error("Overflow while computing the float value"); + } + return {d, size_t(p - srcinit)}; } +} // namespace number_parsing -simdjson_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); +// JSON string may contain embedded nulls, and C++26 reflection does not yet +// support std::string_view as a data member type. As a workaround, we define +// a custom type that holds a const char* and a size. +template struct fixed_json_string { + // Statically-allocated array to hold the characters and a null terminator + static constexpr char inner_data[] = {Vals..., '\0'}; + + // Constant for the length of the string view (excluding the null terminator) + static constexpr std::size_t inner_size = sizeof...(Vals); + + // The std::string_view over the data. + // We use data and size to avoid including the null terminator in the view. + static constexpr std::string_view view = {inner_data, inner_size}; + + constexpr operator std::string_view() { return view; } + constexpr const char *c_str() { return view.data(); } + constexpr const char *data() { return view.data(); } + constexpr size_t size() { return view.size(); } +}; + +consteval std::meta::info to_fixed_json_string(std::string_view in) { + std::vector Args = {}; + for (char c : in) { + Args.push_back(std::meta::reflect_constant(c)); + } + return std::meta::substitute(^^fixed_json_string, Args); } -simdjson_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); +/** + * @brief Helper struct for substitute() pattern + */ +template struct type_builder { + struct constructed_type; + consteval { + std::meta::define_aggregate(^^constructed_type, { + meta_info...}); + } +}; + +/** + * @brief Type alias for the generated struct + * Usage: + * using struct = class_type; + */ +template +using class_type = type_builder::constructed_type; + +/** + */ + +/** + * @brief Variable template for constructing instances with values + */ +template constexpr auto construct_from = T{Vs...}; + +// in JSON, there are only a few whitespace characters that are allowed +// outside of objects, arrays, strings, and numbers. +[[nodiscard]] constexpr bool is_whitespace(char c) { + return c == ' ' || c == '\n' || c == '\t' || c == '\r'; +}; + +[[nodiscard]] constexpr std::string_view trim_whitespace(std::string_view str) { + size_t start = 0; + size_t end = str.size(); + + while (start < end && is_whitespace(str[start])) { + start++; + } + while (end > start && is_whitespace(str[end - 1])) { + end--; + } + return str.substr(start, end - start); } -simdjson_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); +// Forward declaration +consteval std::pair +parse_json_object_impl(std::string_view json); + +/////////////////////////////////////////////////// +/// NUMBER PARSING +/////////////////////////////////////////////////// + +// Parses a JSON number from a string view. +// Returns the number of characters consumed and the parsed value +// as a variant of int64_t, uint64_t, or double. +[[nodiscard]] consteval size_t +parse_number(std::string_view json, + std::variant &out) { + if (json.empty()) { + simdjson_consteval_error("Empty string is not a valid JSON number"); + } + if (json[0] == '+') { + simdjson_consteval_error("Invalid number: leading '+' sign is not allowed"); + } + // Compute the range and determine the type. + auto it = json.begin(); + if (it != json.end() && (*it == '-')) { + it++; + } + while (it != json.end() && (*it >= '0' && *it <= '9')) { + it++; + } + bool is_float = false; + if (it != json.end() && (*it == '.' || *it == 'e' || *it == 'E')) { + is_float = true; + if (*it == '.') { + it++; + while (it != json.end() && (*it >= '0' && *it <= '9')) { + it++; + } + } + if (it != json.end() && (*it == 'e' || *it == 'E')) { + it++; + if (it != json.end() && (*it == '+' || *it == '-')) { + it++; + } + while (it != json.end() && (*it >= '0' && *it <= '9')) { + it++; + } + } + } + size_t scope = it - json.begin(); + + bool is_negative = json[0] == '-'; + // Note that we consider -0 to be an integer unless it has a decimal point or + // exponent. + if (is_float) { + // It would be cool to use std::from_chars in a consteval context, but it is + // not supported yet for floating point types. :-( + auto [value, offset] = + number_parsing::parse_double(json.data(), json.data() + json.size()); + if (offset != scope) { + simdjson_consteval_error( + "Internal error: cannot agree on the character range of the float"); + } + out = value; + return offset; + } else if (is_negative) { + int64_t int_value = 0; + std::from_chars_result res = + std::from_chars(json.data(), json.data() + json.size(), int_value); + if (res.ec == std::errc()) { + out = int_value; + if ((res.ptr - json.data()) != scope) { + simdjson_consteval_error( + "Internal error: cannot agree on the character range of the float"); + } + return (res.ptr - json.data()); + } else { + simdjson_consteval_error("Invalid integer value"); + } + } else { + uint64_t uint_value = 0; + std::from_chars_result res = + std::from_chars(json.data(), json.data() + json.size(), uint_value); + if (res.ec == std::errc()) { + out = uint_value; + if ((res.ptr - json.data()) != scope) { + simdjson_consteval_error( + "Internal error: cannot agree on the character range of the float"); + } + return (res.ptr - json.data()); + } else { + simdjson_consteval_error("Invalid unsigned integer value"); + } + } } -simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); +//////////////////////////////////////////////////////// +/// STRING PARSING +//////////////////////////////////////////////////////// + +// parse a JSON string value, handling escape sequences and validating UTF-8 +// Returns the created string and the number of characters consumed. +// Note that the number of characters consumed includes the surrounding quotes. +// The number of bytes written to out differs from the number of characters +// consumed in general because of escape sequences and UTF-8 encoding. +[[nodiscard]] consteval std::pair +parse_string(std::string_view json) { + auto cursor = json.begin(); + auto end = json.end(); + std::string out; + + // Expect starting quote + if (cursor == end || *(cursor++) != '"') { + simdjson_consteval_error("Expected opening quote for string"); + } + // Notice that the quote is not appended. + + // Process \uXXXX escape sequences, writes out to out. + // Returns true on success, false on error. + auto process_escaped_unicode = [&] [[nodiscard]] () -> bool { + // Processing \uXXXX escape sequence is a bit complicated, so we + // define helper lambdas here. + // + // consume the XXXX in \uXXXX and return the corresponding code point. + // In case of error, a value greater than 0xFFFF is returned. + // The caller should check! + auto hex_to_u32 = [&] [[nodiscard]] () -> uint32_t { + auto digit = [](uint8_t c) -> uint32_t { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + return 0xFFFFFFFF; + }; + if (end - cursor < 4) { + return 0xFFFFFFFF; + } + uint32_t d0 = digit(*cursor++); + uint32_t d1 = digit(*cursor++); + uint32_t d2 = digit(*cursor++); + uint32_t d3 = digit(*cursor++); + + return (d0 << 12) | (d1 << 8) | (d2 << 4) | d3; + }; + // Write code point as UTF-8 into out. + // The caller must ensure that the code point is valid, + // i.e., not in the surrogate range or greater than 0x10FFFF. + auto codepoint_to_utf8 = [&out] [[nodiscard]] (uint32_t cp) -> bool { + // the high and low surrogates U+D800 through U+DFFF are invalid code + // points + if (cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF)) { + return false; + } + if (cp <= 0x7F) { + out.push_back(uint8_t(cp)); + } else if (cp <= 0x7FF) { + out.push_back(uint8_t((cp >> 6) + 192)); + out.push_back(uint8_t((cp & 63) + 128)); + } else if (cp <= 0xFFFF) { + out.push_back(uint8_t((cp >> 12) + 224)); + out.push_back(uint8_t(((cp >> 6) & 63) + 128)); + out.push_back(uint8_t((cp & 63) + 128)); + } else if (cp <= 0x10FFFF) { + out.push_back(uint8_t((cp >> 18) + 240)); + out.push_back(uint8_t(((cp >> 12) & 63) + 128)); + out.push_back(uint8_t(((cp >> 6) & 63) + 128)); + out.push_back(uint8_t((cp & 63) + 128)); + } else { + return false; + } + return true; + }; + // We begin the implementation of process_escaped_unicode here. + // The substitution_code_point is used for invalid sequences. + // (This is U+FFFD, the Unicode replacement character.) + constexpr uint32_t substitution_code_point = 0xfffd; + // We need at least 4 characters for the hex digits + if (end - cursor < 4) { + return false; + } + // Consume the 4 hex digits + uint32_t code_point = hex_to_u32(); + // Check for validity + if (code_point > 0xFFFF) { + return false; + } + // If we have a high surrogate, we need to process a low surrogate + if (code_point >= 0xd800 && code_point < 0xdc00) { + // We need at least 6 more characters for \uXXXX, if they are NOT + // present, we have an error (isolated high surrogate), which we + // tolerate by substituting the substitution_code_point. + if (end - cursor < 6 || *cursor != '\\' || + *(cursor + 1) != 'u' > 0xFFFF) { + code_point = substitution_code_point; + } else { // we have \u following the high surrogate + cursor += 2; // skip \u + uint32_t code_point_2 = hex_to_u32(); + if (code_point_2 > 0xFFFF) { + return false; + } + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + // Not a valid low surrogate + code_point = substitution_code_point; + } else { + code_point = ((code_point - 0xd800) << 10 | low_bit) + 0x10000; + } + } + } else if (code_point >= 0xdc00 && code_point < 0xe000) { + // Isolated low surrogate, invalid + code_point = substitution_code_point; + } + // Now we have the final code point, write it as UTF-8 to out. + return codepoint_to_utf8(code_point); + }; + + while (cursor != end && *cursor != '"') { + // If we find the end of input before closing quote, it's an error + if (cursor == end) { + simdjson_consteval_error("Unterminated string"); + } + // capture the next character and move forward + char c = *(cursor++); + // The one case where we don't want to append to the string directly + // is when the string contains escape sequences. + + if (c == '\\') { + // Assume that it is the start of an escape sequence + size_t how_many_backslashes = 1; + while (cursor != end && *(cursor) == '\\') { + cursor++; + how_many_backslashes++; + } + size_t backslashes_to_add = how_many_backslashes / 2; + // Append the backslashes + for (size_t i = 0; i < backslashes_to_add; i++) { + out += '\\'; + } + // If we have an odd number of backslashes, we have an escape sequence + // besides the backslashes we have already added. + if (how_many_backslashes % 2 == 1) { + // If we have an odd number of backslashes, we must be in an escape + // sequence check that what follows is a valid escape character + if (cursor == end) { + simdjson_consteval_error("Truncated escape sequence in string"); + } + char next_char = *cursor; + cursor++; + switch (next_char) { + case '"': + out += '"'; + break; + case '/': + out += '/'; + break; + case 'b': + out += '\b'; + break; + case 'f': + out += '\f'; + break; + case 'n': + out += '\n'; + break; + case 'r': + out += '\r'; + break; + case 't': + out += '\t'; + break; + case 'u': + if (!process_escaped_unicode()) { + simdjson_consteval_error( + "Invalid unicode escape sequence in string"); + } + break; + default: + simdjson_consteval_error("Invalid escape character in string"); + } + } + continue; // continue to next iteration + } + // Handle escape sequences and UTF-8 validation. We do not process + // the escape sequences here, just validate them. + out += c; + if (static_cast(c) < 0x20) { + simdjson_consteval_error("Invalid control character in string"); + } + if (static_cast(c) >= 0x80) { + // We have a non-ASCII character inside a string + // We must validate it. + uint8_t first_byte = static_cast(c); + + if ((first_byte & 0b11100000) == 0b11000000) { + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + + char second_byte = *cursor; + out += second_byte; + ++cursor; + if ((static_cast(second_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + // range check + uint32_t code_point = (first_byte & 0b00011111) << 6 | + (static_cast(second_byte) & 0b00111111); + if ((code_point < 0x80) || (0x7ff < code_point)) { + simdjson_consteval_error("Invalid UTF-8 code point in string"); + } + } else if ((first_byte & 0b11110000) == 0b11100000) { + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + char second_byte = *cursor; + ++cursor; + out += second_byte; + if ((static_cast(second_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + char third_byte = *cursor; + ++cursor; + out += third_byte; + if ((static_cast(third_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + // range check + uint32_t code_point = (first_byte & 0b00001111) << 12 | + (static_cast(second_byte) & 0b00111111) + << 6 | + (static_cast(third_byte) & 0b00111111); + if ((code_point < 0x800) || (0xffff < code_point) || + (0xd7ff < code_point && code_point < 0xe000)) { + simdjson_consteval_error("Invalid UTF-8 code point in string"); + } + } else if ((first_byte & 0b11111000) == 0b11110000) { // 0b11110000 + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + char second_byte = *cursor; + ++cursor; + out += second_byte; + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + char third_byte = *cursor; + ++cursor; + out += third_byte; + if (cursor == end) { + simdjson_consteval_error("Truncated UTF-8 sequence in string"); + } + char fourth_byte = *cursor; + ++cursor; + out += fourth_byte; + if ((static_cast(second_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + if ((static_cast(third_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + if ((static_cast(fourth_byte) & 0b11000000) != 0b10000000) { + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + // range check + uint32_t code_point = + (first_byte & 0b00000111) << 18 | + (static_cast(second_byte) & 0b00111111) << 12 | + (static_cast(third_byte) & 0b00111111) << 6 | + (static_cast(fourth_byte) & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { + simdjson_consteval_error("Invalid UTF-8 code point in string"); + } + } else { + // we have a continuation + simdjson_consteval_error("Invalid UTF-8 continuation byte in string"); + } + continue; + } + } + if (cursor == end) { + simdjson_consteval_error("Unterminated string"); + } + if (*cursor != '"') { + simdjson_consteval_error("Internal error: expected closing quote"); + } + cursor++; // consume the closing quote + // We get here if and only if we have seen the closing quote. + + return {out, size_t(cursor - json.begin())}; } -} // namespace ondemand -} // namespace lasx -} // namespace simdjson +//////////////////////////////////////////////////// +/// ARRAY PARSING +//////////////////////////////////////////////////// -namespace simdjson { +// Parses a JSON array and returns a std::meta::info representing the array as +// well as the number of characters consumed. +consteval std::pair +parse_json_array_impl(const std::string_view json) { + size_t consumed = 0; + auto cursor = json.begin(); + auto end = json.end(); + auto is_whitespace = [](char c) { + return c == ' ' || c == '\n' || c == '\t' || c == '\r'; + }; -simdjson_inline simdjson_result::simdjson_result(lasx::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} + auto skip_whitespace = [&]() -> void { + while (cursor != end && is_whitespace(*cursor)) + cursor++; + }; -} // namespace simdjson + auto expect_consume = [&] [[nodiscard]] (char c) -> bool { + skip_whitespace(); + if (cursor == end || *(cursor++) != c) { + return false; + }; + return true; + }; -#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H -/* end file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ + if (!expect_consume('[')) { + simdjson_consteval_error("Expected '['"); + } + std::vector values = {^^void}; + skip_whitespace(); + if (cursor != end && *cursor == ']') { + if (!expect_consume(']')) { + simdjson_consteval_error("Expected ']'"); + } + // Empty array - use int as placeholder type since void doesn't work + auto array_type = std::meta::substitute( + ^^std::array, { + ^^int, std::meta::reflect_constant(0uz)}); + values[0] = array_type; + consumed = size_t(cursor - json.begin()); + if (json[consumed - 1] != ']') { + simdjson_consteval_error("Expected ']'"); + } + return {std::meta::substitute(^^construct_from, values), consumed}; + } + while (cursor != end && *cursor != ']') { + char c = *cursor; + switch (c) { + case '{': { + std::string_view value(cursor, end); + auto [parsed, object_size] = parse_json_object_impl(value); + if (*(cursor + object_size - 1) != '}') { + simdjson_consteval_error("Expected '}'"); + } + values.push_back(parsed); + cursor += object_size; + break; + } + case '[': { + std::string_view value(cursor, end); + auto [parsed, array_size] = parse_json_array_impl(value); + if (*(cursor + array_size - 1) != ']') { + simdjson_consteval_error("Expected ']'"); + } + values.push_back(parsed); + cursor += array_size; + break; + } + case '"': { + auto res = parse_string(std::string_view(cursor, end)); + cursor += res.second; + for (char ch : res.first) { + if (ch == '\0') { + simdjson_consteval_error( + "Field string values cannot contain embedded nulls"); + } + } + values.push_back(std::meta::reflect_constant_string(res.first)); + break; + } + case 't': { + if (end - cursor < 4 || std::string_view(cursor, 4) != "true") { + simdjson_consteval_error("Invalid value"); + } + cursor += 4; + values.push_back(std::meta::reflect_constant(true)); + break; + } + case 'f': { + if (end - cursor < 5 || std::string_view(cursor, 5) != "false") { + simdjson_consteval_error("Invalid value"); + } + cursor += 5; + values.push_back(std::meta::reflect_constant(false)); + break; + } + case 'n': { + if (end - cursor < 4 || std::string_view(cursor, 4) != "null") { + simdjson_consteval_error("Invalid value"); + } + cursor += 4; + values.push_back(std::meta::reflect_constant(nullptr)); + break; + } + // We deliberately do not include '+' here, as per JSON spec + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + std::string_view suffix = std::string_view(cursor, end); + std::variant out; + size_t r = parse_number(suffix, out); + cursor += r; + if (std::holds_alternative(out)) { + int64_t int_value = std::get(out); + values.push_back(std::meta::reflect_constant(int_value)); + } else if (std::holds_alternative(out)) { + uint64_t uint_value = std::get(out); + values.push_back(std::meta::reflect_constant(uint_value)); + } else { + double float_value = std::get(out); + values.push_back(std::meta::reflect_constant(float_value)); + } + break; + } + default: + simdjson_consteval_error("Invalid character starting value"); + } + skip_whitespace(); + if (cursor != end && *cursor == ',') { + ++cursor; + skip_whitespace(); + } + } -/* end file simdjson/generic/ondemand/amalgamated.h for lasx */ -/* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ -/* begin file simdjson/lasx/end.h */ -/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ -/* amalgamation skipped (editor-only): #include "simdjson/lasx/base.h" */ -/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + if (!expect_consume(']')) { + simdjson_consteval_error("Expected ']'"); + } + std::size_t count = values.size() - 1; + // We assume all elements have the same type as the first element. + // However, if the array is heterogeneous, we should use std::variant. + auto array_type = std::meta::substitute( + ^^std::array, + { + std::meta::type_of(values[1]), std::meta::reflect_constant(count)}); -#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT -/* undefining SIMDJSON_IMPLEMENTATION from "lasx" */ -#undef SIMDJSON_IMPLEMENTATION -/* end file simdjson/lasx/end.h */ + // Create array instance with values + values[0] = array_type; + consumed = size_t(cursor - json.begin()); + if (json[consumed - 1] != ']') { + simdjson_consteval_error("Expected ']'"); + } + return {std::meta::substitute(^^construct_from, values), consumed}; +} -#endif // SIMDJSON_LASX_ONDEMAND_H -/* end file simdjson/lasx/ondemand.h */ -#else -#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION -#endif +//////////////////////////////////////////////////// +/// OBJECT PARSING +//////////////////////////////////////////////////// -/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ -#undef SIMDJSON_CONDITIONAL_INCLUDE +// Parses a JSON object and returns a std::meta::info representing the object +// type as well as the number of characters consumed. +consteval std::pair +parse_json_object_impl(std::string_view json) { + size_t consumed = 0; + auto cursor = json.begin(); + auto end = json.end(); -namespace simdjson { - /** - * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand - */ - namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; -} // namespace simdjson + auto skip_whitespace = [&]() -> void { + while (cursor != end && is_whitespace(*cursor)) + cursor++; + }; -#endif // SIMDJSON_BUILTIN_ONDEMAND_H -/* end file simdjson/builtin/ondemand.h */ + auto expect_consume = [&] [[nodiscard]] (char c) -> bool { + skip_whitespace(); + if (cursor == end || *(cursor++) != c) { + return false; + }; + return true; + }; -namespace simdjson { - /** - * @copydoc simdjson::builtin::ondemand - */ - namespace ondemand = builtin::ondemand; + if (!expect_consume('{')) { + simdjson_consteval_error("Expected '{'"); + } + + std::vector members; + std::vector values = {^^void}; + + while (cursor != end && *cursor != '}') { + skip_whitespace(); + // Not all strings can be identifiers, but in JSON field names can be any + // string. Thus we may have a problem if the field name contains characters + // not allowed in identifiers. Let the standard library handle that case. + auto field = parse_string(std::string_view(cursor, end)); + std::string field_name = field.first; + cursor += field.second; + if (!expect_consume(':')) { + simdjson_consteval_error("Expected ':'"); + } + skip_whitespace(); + if (cursor == end) { + simdjson_consteval_error("Expected value after colon"); + } + char c = *cursor; + switch (c) { + case '{': { + std::string_view value(cursor, end); + auto [parsed, object_size] = parse_json_object_impl(value); + if (*(cursor + object_size - 1) != '}') { + simdjson_consteval_error("Expected '}'"); + } + cursor += object_size; + auto dms = std::meta::data_member_spec(std::meta::type_of(parsed), + {.name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(parsed); + + break; + } + case '[': { + std::string_view value(cursor, end); + auto [parsed, array_size] = parse_json_array_impl(value); + auto dms = std::meta::data_member_spec(std::meta::type_of(parsed), + {.name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(parsed); + if (*(cursor + array_size - 1) != ']') { + simdjson_consteval_error("Expected ']'"); + } + cursor += array_size; + + break; + } + case '"': { + auto res = parse_string(std::string_view(cursor, end)); + std::string_view value = res.first; + cursor += res.second; + for (char ch : value) { + if (ch == '\0') { + simdjson_consteval_error( + "Field string values cannot contain embedded nulls"); + } + } + auto dms = + std::meta::data_member_spec(^^const char *, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant_string(value)); + break; + } + case 't': { + if (end - cursor < 4 || std::string_view(cursor, 4) != "true") { + simdjson_consteval_error("Invalid value"); + } + cursor += 4; + + auto dms = std::meta::data_member_spec(^^bool, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(true)); + break; + } + case 'f': { + if (end - cursor < 5 || std::string_view(cursor, 5) != "false") { + simdjson_consteval_error("Invalid value"); + } + cursor += 5; + + auto dms = std::meta::data_member_spec(^^bool, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(false)); + break; + } + case 'n': { + if (end - cursor < 4 || std::string_view(cursor, 4) != "null") { + simdjson_consteval_error("Invalid value"); + } + cursor += 4; + + auto dms = std::meta::data_member_spec(^^std::nullptr_t, + { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(nullptr)); + break; + } + // We deliberately do not include '+' here, as per JSON spec + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + std::string_view suffix = std::string_view(cursor, end); + std::variant out; + size_t r = parse_number(suffix, out); + cursor += r; + if (std::holds_alternative(out)) { + int64_t int_value = std::get(out); + auto dms = + std::meta::data_member_spec(^^int64_t, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(int_value)); + } else if (std::holds_alternative(out)) { + uint64_t uint_value = std::get(out); + auto dms = + std::meta::data_member_spec(^^uint64_t, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(uint_value)); + } else { + double float_value = std::get(out); + auto dms = + std::meta::data_member_spec(^^double, { + .name = field_name}); + members.push_back(std::meta::reflect_constant(dms)); + values.push_back(std::meta::reflect_constant(float_value)); + } + break; + } + default: + simdjson_consteval_error("Invalid character starting value"); + } + skip_whitespace(); + if (cursor == end) { + simdjson_consteval_error("Expected '}' or ','"); + } + if (*cursor == ',') { + ++cursor; + skip_whitespace(); + } else if (*cursor != '}') { + simdjson_consteval_error("Expected '}'"); + } + } + + if (!expect_consume('}')) { + simdjson_consteval_error("Expected '}'"); + } + values[0] = std::meta::substitute(^^class_type, members); + consumed = size_t(cursor - json.begin()); + if (json[consumed - 1] != '}') { + simdjson_consteval_error("Expected '}'"); + } + return {std::meta::substitute(^^construct_from, values), consumed}; +} + +/** + * Our public function to parse JSON at compile time. + * @brief Compile-time JSON parser. This function parses the provided JSON + * string at compile time and returns a custom struct type representing the JSON + * object. + */ +template consteval auto parse_json() { + constexpr std::string_view json = trim_whitespace(json_str.view()); + static_assert(!json.empty(), "JSON string cannot be empty"); + /*static_assert(json.front() != '{' && json.front() != '[', + "Only JSON objects and arrays are supported at the top level, this " + "limitation will be lifted in the future.");*/ + + constexpr auto result = json.front() == '[' + ? parse_json_array_impl(json) + : parse_json_object_impl(json); + return [: result.first :]; + /* + if(json.front() == '[') { + return [:parse_json_array_impl(json).first:]; + } else if(json.front() == '{') { + // return [:parse_json_object_impl(json).first:]; + }*/ +} + +} // namespace compile_time } // namespace simdjson -#endif // SIMDJSON_ONDEMAND_H -/* end file simdjson/ondemand.h */ +#endif // SIMDJSON_STATIC_REFLECTION +#endif // SIMDJSON_GENERIC_COMPILE_TIME_JSON_INL_H +/* end file simdjson/compile_time_json-inl.h */ + #endif // SIMDJSON_H /* end file simdjson.h */ diff --git a/tests/test_parser.py b/tests/test_parser.py index d5d1916..0bc95e5 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -72,11 +72,13 @@ def test_implementation(): with pytest.raises(ValueError): parser.implementation = 'rubbish' - # The generic, always-available Implementation. - parser.implementation = 'fallback' - parser.parse(b'{"hello": "world"}') - - assert parser.implementation[0] == 'fallback' - + # The generic implementation is only available on systems where it is + # potentially needed, or when you force it to be available. + # The cost of forcing the generic implementation is that you then + # have the runtime dispatching overhead (it is tiny but not zero) and + # a larger binary. implementations = [imp[0] for imp in parser.get_implementations()] - assert 'fallback' in implementations + for imp in implementations: + parser.implementation = imp + parser.parse(b'{"hello": "world"}') + assert parser.implementation[0] == imp