diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4308d822046df995d72605f3497d02ae5ae307d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target/ +**/*.rs.bk +Cargo.lock diff --git a/BUILD.gn b/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..c25f3d63794142d44bffc1f5f319d0ea62d87896 --- /dev/null +++ b/BUILD.gn @@ -0,0 +1,27 @@ +# Copyright (c) 2023 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/templates/rust/ohos_cargo_crate.gni") + +ohos_cargo_crate("lib") { + crate_name = "arbitrary" + crate_type = "rlib" + output_name = "liblibfuzzer" + visibility = ["*"] + crate_root = "src/lib.rs" + sources = "src/lib.rs" + edition = "2018" + features = [ + "derive", + ] +} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..f497c2907295a19d5a8a95e569463f6ba770d5b0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,476 @@ +## Unreleased + +Released YYYY-MM-DD. + +### Added + +* TODO (or remove section if none) + +### Changed + +* TODO (or remove section if none) + +### Deprecated + +* TODO (or remove section if none) + +### Removed + +* TODO (or remove section if none) + +### Fixed + +* TODO (or remove section if none) + +### Security + +* TODO (or remove section if none) + +-------------------------------------------------------------------------------- + +## 1.3.0 + +Released 2023-03-13. + +### Added + +* Added the ability to manually specify derived trait bounds for + `Arbitrary`. See [#138](https://github.com/rust-fuzz/arbitrary/pull/138) for + details. + +### Fixed + +* Fixed minimal versions correctness for `syn`. + +-------------------------------------------------------------------------------- + +## 1.2.3 + +Released 2023-01-20. + +### Fixed + +* The `derive(Arbitrary)` will now annotate the generated `impl`s with a `#[automatically_derived]` + attribute to indicate to e.g. clippy that lints should not fire for the code within the derived + implementation. + +## 1.2.2 + +Released 2023-01-03. + +### Fixed + +* Ensured that `arbitrary` and `derive_arbitrary` versions are synced up so that + they don't, e.g., emit generated code that depends on newer versions of + `arbitrary` than the one currently in + use. [#134](https://github.com/rust-fuzz/arbitrary/issues/134) + +## 1.2.1 + +### Fixed + +* Fixed an issue where `std::thread_local!` macro invocations in derive code + were not fully prefixed, causing confusing build errors in certain situations. + +## 1.2.0 + +Released 2022-10-20. + +### Added + +* Support custom arbitrary implementation for fields on + derive. [#129](https://github.com/rust-fuzz/arbitrary/pull/129) + +-------------------------------------------------------------------------------- + +## 1.1.6 + +Released 2022-09-20. + +### Fixed + +* Fixed a potential panic due to an off-by-one error in the `Arbitrary` + implementation for `std::ops::Bound`. + +-------------------------------------------------------------------------------- + +## 1.1.5 + +Released 2022-09-08. + +### Added + +* Implemented `Arbitrary` for `std::ops::Bound`. + +### Fixed + +* Fixed a bug where `Unstructured::int_in_range` could return out-of-range + integers when generating arbitrary signed integers. + +-------------------------------------------------------------------------------- + +## 1.1.4 + +Released 2022-08-29. + +### Added + +* Implemented `Arbitrary` for `Rc` and `Arc` + +### Changed + +* Allow overriding the error type in `arbitrary::Result` +* The `Unstructured::arbitrary_loop` method will consume fewer bytes of input + now. + +### Fixed + +* Fixed a bug where `Unstructured::int_in_range` could return out-of-range + integers. + +-------------------------------------------------------------------------------- + +## 1.1.3 + +Released 2022-06-23. + +### Fixed + +* Fixed some potential (but highly unlikely) name-clashes inside + `derive(Arbitrary)`'s generated + code. [#111](https://github.com/rust-fuzz/arbitrary/pull/111) +* Fixed an edge case where `derive(Arbitrary)` for recursive types that detected + an overflow would not reset the overflow + detection. [#111](https://github.com/rust-fuzz/arbitrary/pull/111) + +-------------------------------------------------------------------------------- + +## 1.1.2 + +Released 2022-06-16. + +### Fixed + +* Fixed a warning inside `derive(Arbitrary)`-generated + code. [#110](https://github.com/rust-fuzz/arbitrary/pull/110) + +-------------------------------------------------------------------------------- + +## 1.1.1 + +Released 2022-06-14. + +### Fixed + +* Fixed a stack overflow when using `derive(Arbitrary)` with recursive types and + empty inputs. [#109](https://github.com/rust-fuzz/arbitrary/pull/109) + +-------------------------------------------------------------------------------- + +## 1.1.0 + +Released 2022-02-09. + +### Added + +* Added the `Unstructured::ratio` method to generate a boolean that is `true` at + the given rate. + +* Added the `Unstructured::arbitrary_loop` method to call a function an + arbitrary number of times. + +-------------------------------------------------------------------------------- + +## 1.0.3 + +Released 2021-11-20. + +### Fixed + +* Fixed documentation for `Unstructured::fill_bytes`. We forgot to update this + way back in [#53](https://github.com/rust-fuzz/arbitrary/pull/53) when the + behavior changed. + +-------------------------------------------------------------------------------- + +## 1.0.2 + +Released 2021-08-25. + +### Added + +* `Arbitrary` impls for `HashMap`s and `HashSet`s with custom `Hasher`s + [#87](https://github.com/rust-fuzz/arbitrary/pull/87) + +-------------------------------------------------------------------------------- + +## 1.0.1 + +Released 2021-05-20. + +### Added + +* `Arbitrary` impls for `NonZeroX` types [#79](https://github.com/rust-fuzz/arbitrary/pull/79) +* `Arbitrary` impls for all arrays using const generics [#55](https://github.com/rust-fuzz/arbitrary/pull/55) +* `Arbitrary` impls for `Ipv4Addr` and `Ipv6Addr` [#84](https://github.com/rust-fuzz/arbitrary/pull/84) + +### Fixed + +* Use fewer bytes for `Unstructured::int_in_range()` [#80](https://github.com/rust-fuzz/arbitrary/pull/80) +* Use correct range for `char` generation [#83](https://github.com/rust-fuzz/arbitrary/pull/83) + +-------------------------------------------------------------------------------- + +## 1.0.0 + +Released 2020-02-24. + +See 1.0.0-rc1 and 1.0.0-rc2 for changes since 0.4.7, which was the last main +line release. + +-------------------------------------------------------------------------------- + +## 1.0.0-rc2 + +Released 2021-02-09. + +### Added + +* The `Arbitrary` trait is now implemented for `&[u8]`. [#67](https://github.com/rust-fuzz/arbitrary/pull/67) + +### Changed + +* Rename `Unstructured#get_bytes` to `Unstructured#bytes`. [#70](https://github.com/rust-fuzz/arbitrary/pull/70) +* Passing an empty slice of choices to `Unstructured#choose` returns an error. Previously it would panic. [71](https://github.com/rust-fuzz/arbitrary/pull/71) + +-------------------------------------------------------------------------------- + +## 1.0.0-rc1 + +Released 2020-11-25. + +### Added + +* The `Arbitrary` trait is now implemented for `&str`. [#63](https://github.com/rust-fuzz/arbitrary/pull/63) + +### Changed + +* The `Arbitrary` trait now has a lifetime parameter, allowing `Arbitrary` implementations that borrow from the raw input (e.g. the new `&str` implementaton). The `derive(Arbitrary)` macro also supports deriving `Arbitrary` on types with lifetimes now. [#63](https://github.com/rust-fuzz/arbitrary/pull/63) + +### Removed + +* The `shrink` method on the `Arbitrary` trait has been removed. + + We have found that, in practice, using [internal reduction](https://drmaciver.github.io/papers/reduction-via-generation-preview.pdf) via approaches like `cargo fuzz tmin`, where the raw input bytes are reduced rather than the `T: Arbitrary` type constructed from those raw bytes, has the best efficiency-to-maintenance ratio. To the best of our knowledge, no one is relying on or using the `Arbitrary::shrink` method. If you *are* using and relying on the `Arbitrary::shrink` method, please reach out by [dropping a comment here](https://github.com/rust-fuzz/arbitrary/issues/62) and explaining how you're using it and what your use case is. We'll figure out what the best solution is, including potentially adding shrinking functionality back to the `arbitrary` crate. + +-------------------------------------------------------------------------------- + +## 0.4.7 + +Released 2020-10-14. + +### Added + +* Added an optimization to avoid unnecessarily consuming bytes from the + underlying data when there is only one possible choice in + `Unstructured::{int_in_range, choose, etc..}`. + +* Added license files to the derive crate. + +### Changed + +* The `Arbitrary` implementation for `std::time::Duration` should now be faster + and produce durations with a more-uniform distribution of nanoseconds. + +-------------------------------------------------------------------------------- + +## 0.4.6 + +Released 2020-08-22. + +### Added + +* Added the `Unstructured::peek_bytes` method. + +### Changed + +* Test case reduction via `cargo fuzz tmin` should be much more effective at + reducing the sizes of collections now. (See + [#53](https://github.com/rust-fuzz/arbitrary/pull/53) and the commit messages + for details.) + +* Fuzzing with mutation-based fuzzers (like libFuzzer) should be more efficient + now. (See [#53](https://github.com/rust-fuzz/arbitrary/pull/53) and the commit + messages for details) + +-------------------------------------------------------------------------------- + +## 0.4.5 + +Released 2020-06-18. + +### Added + +* Implement `Arbitrary` for zero length arrays. +* Implement `Arbitrary` for `Range` and `RangeInclusive`. + +-------------------------------------------------------------------------------- + +## 0.4.4 + +Released 2020-04-29. + +### Fixed + +* Fixed the custom derive for enums when used via its full path (like + `#[derive(arbitrary::Arbitrary)]` rather than like `#[derive(Arbitrary)]`). + + +## 0.4.3 + +Released 2020-04-28. + +### Fixed + +* Fixed the custom derive when used via its full path (like + `#[derive(arbitrary::Arbitrary)]` rather than like `#[derive(Arbitrary)]`). + +-------------------------------------------------------------------------------- + +## 0.4.2 + +Released 2020-04-17. + +### Changed + +* We forgot to release a new version of the `derive_arbitrary` crate last + release. This release fixes that and so the `synstructure` dependency is + finally actually removed in the cargo releases. + +-------------------------------------------------------------------------------- + +## 0.4.1 + +Released 2020-03-18. + +### Removed + +* Removed an internal dependency on the `synstructure` crate when the `derive` + feature is enabled. This should not have any visible downstream effects other + than faster build times! + +-------------------------------------------------------------------------------- + +## 0.4.0 + +Released 2020-01-22. + +This is technically a breaking change, but we expect that nearly everyone should +be able to upgrade without any compilation errors. The only exception is if you +were implementing the `Arbitrary::size_hint` method by hand. If so, see the +"changed" section below and the [API docs for +`Arbitrary::shrink`](https://docs.rs/arbitrary/0.4.0/arbitrary/trait.Arbitrary.html#method.size_hint) +for details. + +### Added + +* Added [the `arbitary::size_hint::recursion_guard` helper + function][recursion_guard] for guarding against infinite recursion in + `size_hint` implementations for recursive types. + +### Changed + +* The `Arbitrary::size_hint` signature now takes a `depth: usize` + parameter. This should be passed along unmodified to any nested calls of other + `size_hint` methods. If you're implementing `size_hint` for a recursive type + (like a linked list or tree) or a generic type with type parameters, you + should use [the new `arbitrary::size_hint::recursion_guard` helper + function][recursion_guard]. + +### Fixed + +* Fixed infinite recursion in generated `size_hint` implementations + from `#[derive(Arbitrary)]` for recursive types. + +[recursion_guard]: https://docs.rs/arbitrary/0.4.0/arbitrary/size_hint/fn.recursion_guard.html + +-------------------------------------------------------------------------------- + +## 0.3.2 + +Released 2020-01-16. + +### Changed + +* Updated the custom derive's dependencies. + +-------------------------------------------------------------------------------- + +## 0.3.2 + +Released 2020-01-15. + +### Fixed + +* Fixed an over-eager assertion condition in `Unstructured::int_in_range` that + would incorrectly trigger when given valid ranges of length one. + +-------------------------------------------------------------------------------- + +## 0.3.1 + +Released 2020-01-14. + +### Fixed + +* Fixed some links and version numbers in README. + +-------------------------------------------------------------------------------- + +## 0.3.0 + +Released 2020-01-14. + +### Added + +* Added the `"derive"` cargo feature, to enable `#[derive(Arbitrary)]` for + custom types. Enabling this feature re-exports functionality from the + `derive_arbitrary` crate. +* The custom derive for `Arbitrary` implements the shrink method for you now. +* All implementations of `Arbitrary` for `std` types implement shrinking now. +* Added the `Arbitrary::arbitrary_take_rest` method allows an `Arbitrary` + implementation to consume all of the rest of the remaining raw input. It has a + default implementation that forwards to `Arbitrary::arbitrary` and the custom + derive creates a smart implementation for your custom types. +* Added the `Arbitrary::size_hint` method for hinting how many raw bytes an + implementation needs to construct itself. This has a default implementation, + but the custom derive creates a smart implementation for your custom types. +* Added the `Unstructured::choose` method to choose one thing among a set of + choices. +* Added the `Unstructured::arbitrary_len` method to get an arbitrary length for + a collection of some arbitrary type. +* Added the `Unstructured::arbitrary_iter` method to create an iterator of + arbitrary instance of some type. + +### Changed + +* The `Arbitrary` trait was simplified a bit. +* `Unstructured` is a concrete type now, not a trait. +* Switched to Rust 2018 edition. + +### Removed + +* `RingBuffer` and `FiniteBuffer` are removed. Use `Unstructured` instead. + +### Fixed + +* Better `Arbitrary` implementation for `char`. +* Better `Arbitrary` implementation for `String`. + +-------------------------------------------------------------------------------- + +## 0.2.0 + +-------------------------------------------------------------------------------- + +## 0.1.0 diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..a7c9a37545d5560da1b57ea46c55bb13367e87b4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "arbitrary" +version = "1.3.0" # Make sure this matches the derive crate version (not including the patch version) +authors = [ + "The Rust-Fuzz Project Developers", + "Nick Fitzgerald ", + "Manish Goregaokar ", + "Simonas Kazlauskas ", + "Brian L. Troutwine ", + "Corey Farwell ", +] +categories = ["development-tools::testing"] +edition = "2018" +keywords = ["arbitrary", "testing"] +readme = "README.md" +description = "The trait for generating structured data from unstructured data" +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-fuzz/arbitrary/" +documentation = "https://docs.rs/arbitrary/" +rust-version = "1.63.0" + +[dependencies] +derive_arbitrary = { version = "1.3.0", path = "./derive", optional = true } + +[features] +# Turn this feature on to enable support for `#[derive(Arbitrary)]`. +derive = ["derive_arbitrary"] + +[[example]] +name = "derive_enum" +required-features = ["derive"] + +[[test]] +name = "derive" +path = "./tests/derive.rs" +required-features = ["derive"] + +[workspace] +members = ["./fuzz"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000000000000000000000000000000000000..16fe87b06e802f094b3fbb0894b137bca2b16ef1 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000000000000000000000000000000000000..d74f9e93d4c0f2fc863815a7631d5ad840b05002 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,27 @@ +MIT License + +Copyright (c) 2019 Manish Goregaokar + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/OAT.xml b/OAT.xml new file mode 100644 index 0000000000000000000000000000000000000000..4fb316396cfdfbd6b18ded8442687c5b1e6b983b --- /dev/null +++ b/OAT.xml @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/README.en.md b/README.en.md deleted file mode 100644 index 8263b108f72d24202c9efe5a36e7f0ba167f3c30..0000000000000000000000000000000000000000 --- a/README.en.md +++ /dev/null @@ -1,36 +0,0 @@ -# third_party_rust_arbitrary - -#### Description -Rust Fuzz测试用例编译依赖三方库。 - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx - -#### Contribution - -1. Fork the repository -2. Create Feat_xxx branch -3. Commit your code -4. Create Pull Request - - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md index 2389c1a6d90babe019c9ef049293bacdd7c33e0c..c82309b20b7f7171e107a9f803a3d870ce932d7a 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,129 @@ -# third_party_rust_arbitrary +
-#### 介绍 -Rust Fuzz测试用例编译依赖三方库。 +

Arbitrary

-#### 软件架构 -软件架构说明 +

The trait for generating structured data from arbitrary, unstructured input.

+ GitHub Actions Status -#### 安装教程 +
-1. xxxx -2. xxxx -3. xxxx +## About -#### 使用说明 +The `Arbitrary` crate lets you construct arbitrary instances of a type. -1. xxxx -2. xxxx -3. xxxx +This crate is primarily intended to be combined with a fuzzer like [libFuzzer +and `cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) or +[AFL](https://github.com/rust-fuzz/afl.rs), and to help you turn the raw, +untyped byte buffers that they produce into well-typed, valid, structured +values. This allows you to combine structure-aware test case generation with +coverage-guided, mutation-based fuzzers. -#### 参与贡献 +## Documentation -1. Fork 本仓库 -2. 新建 Feat_xxx 分支 -3. 提交代码 -4. 新建 Pull Request +[**Read the API documentation on `docs.rs`!**](https://docs.rs/arbitrary) +## Example -#### 特技 +Say you're writing a color conversion library, and you have an `Rgb` struct to +represent RGB colors. You might want to implement `Arbitrary` for `Rgb` so that +you could take arbitrary `Rgb` instances in a test function that asserts some +property (for example, asserting that RGB converted to HSL and converted back to +RGB always ends up exactly where we started). -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) +### Automatically Deriving `Arbitrary` + +Automatically deriving the `Arbitrary` trait is the recommended way to implement +`Arbitrary` for your types. + +Automatically deriving `Arbitrary` requires you to enable the `"derive"` cargo +feature: + +```toml +# Cargo.toml + +[dependencies] +arbitrary = { version = "1", features = ["derive"] } +``` + +And then you can simply add `#[derive(Arbitrary)]` annotations to your types: + +```rust +// rgb.rs + +use arbitrary::Arbitrary; + +#[derive(Arbitrary)] +pub struct Rgb { + pub r: u8, + pub g: u8, + pub b: u8, +} +``` + +#### Customizing single fields + +This can be particular handy if your structure uses a type that does not implement `Arbitrary` or you want to have more customization for particular fields. + +```rust +#[derive(Arbitrary)] +pub struct Rgba { + // set `r` to Default::default() + #[arbitrary(default)] + pub r: u8, + + // set `g` to 255 + #[arbitrary(value = 255)] + pub g: u8, + + // Generate `b` with a custom function of type + // + // fn(&mut Unstructured) -> arbitrary::Result + // + // where `T` is the field's type. + #[arbitrary(with = arbitrary_b)] + pub b: u8, + + // Generate `a` with a custom closure (shortuct to avoid a custom function) + #[arbitrary(with = |u: &mut Unstructured| u.int_in_range(0..=64))] + pub a: u8, +} + +fn arbitrary_b(u: &mut Unstructured) -> arbitrary::Result { + u.int_in_range(64..=128) +} +``` + +### Implementing `Arbitrary` By Hand + +Alternatively, you can write an `Arbitrary` implementation by hand: + +```rust +// rgb.rs + +use arbitrary::{Arbitrary, Result, Unstructured}; + +#[derive(Copy, Clone, Debug)] +pub struct Rgb { + pub r: u8, + pub g: u8, + pub b: u8, +} + +impl<'a> Arbitrary<'a> for Rgb { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let r = u8::arbitrary(u)?; + let g = u8::arbitrary(u)?; + let b = u8::arbitrary(u)?; + Ok(Rgb { r, g, b }) + } +} +``` + +## License + +Licensed under dual MIT or Apache-2.0 at your choice. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/bundle.json b/bundle.json new file mode 100644 index 0000000000000000000000000000000000000000..d186069a033992de3cf7ee004de2a007ebfc178b --- /dev/null +++ b/bundle.json @@ -0,0 +1,31 @@ +{ + "name": "@ohos/arbitrary", + "description": "The trait for generating structured data from arbitrary, unstructured input.", + "version": "v1.3.0", + "license": "Apache 2.0", + "publishAs": "code-segment", + "segment": { + "destPath": "third_party/arbitrary" + }, + "dirs": {}, + "scripts": {}, + "licensePath": "COPYING", + "component": { + "name": "arbitrary", + "subsystem": "thirdparty", + "syscap": [], + "features": [], + "adapted_system_type": [], + "rom": "", + "ram": "", + "deps": { + "components": [], + "third_party": [] + }, + "build": { + "sub_component": [], + "inner_kits": [], + "test": [] + } + } +} diff --git a/derive/Cargo.toml b/derive/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..aa90dbb4afca3063d6a33ac7dd79949b5af54e31 --- /dev/null +++ b/derive/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "derive_arbitrary" +version = "1.3.1" # Make sure it matches the version of the arbitrary crate itself (not including the patch version) +authors = [ + "The Rust-Fuzz Project Developers", + "Nick Fitzgerald ", + "Manish Goregaokar ", + "Andre Bogus ", + "Corey Farwell ", +] +categories = ["development-tools::testing"] +edition = "2021" +keywords = ["arbitrary", "testing", "derive", "macro"] +readme = "README.md" +description = "Derives arbitrary traits" +license = "MIT/Apache-2.0" +repository = "https://github.com/rust-fuzz/arbitrary" +documentation = "https://docs.rs/arbitrary/" +rust-version = "1.63.0" + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "2", features = ['derive', 'parsing'] } + +[lib] +proc_macro = true diff --git a/derive/LICENSE-APACHE b/derive/LICENSE-APACHE new file mode 100644 index 0000000000000000000000000000000000000000..16fe87b06e802f094b3fbb0894b137bca2b16ef1 --- /dev/null +++ b/derive/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/derive/LICENSE-MIT b/derive/LICENSE-MIT new file mode 100644 index 0000000000000000000000000000000000000000..d74f9e93d4c0f2fc863815a7631d5ad840b05002 --- /dev/null +++ b/derive/LICENSE-MIT @@ -0,0 +1,27 @@ +MIT License + +Copyright (c) 2019 Manish Goregaokar + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/derive/README.md b/derive/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f48c75de1ac3c839f860a291e801539580e848f --- /dev/null +++ b/derive/README.md @@ -0,0 +1,7 @@ +# `#[derive(Arbitrary)]` + +This crate implements support for automatically deriving [the `Arbitrary` +trait](https://docs.rs/arbitrary/*/arbitrary/trait.Arbitrary.html). + +Don't depend on this crate directly, though. Instead, enable the `"derive"` +feature of the `arbitrary` crate. diff --git a/derive/src/container_attributes.rs b/derive/src/container_attributes.rs new file mode 100644 index 0000000000000000000000000000000000000000..269131c88474078e1302daefc7056a8aa408772f --- /dev/null +++ b/derive/src/container_attributes.rs @@ -0,0 +1,78 @@ +use crate::ARBITRARY_ATTRIBUTE_NAME; +use syn::{ + parse::Error, punctuated::Punctuated, DeriveInput, Expr, ExprLit, Lit, Meta, MetaNameValue, + Token, TypeParam, +}; + +pub struct ContainerAttributes { + /// Specify type bounds to be applied to the derived `Arbitrary` implementation instead of the + /// default inferred bounds. + /// + /// ```ignore + /// #[arbitrary(bound = "T: Default, U: Debug")] + /// ``` + /// + /// Multiple attributes will be combined as long as they don't conflict, e.g. + /// + /// ```ignore + /// #[arbitrary(bound = "T: Default")] + /// #[arbitrary(bound = "U: Default")] + /// ``` + pub bounds: Option>>, +} + +impl ContainerAttributes { + pub fn from_derive_input(derive_input: &DeriveInput) -> Result { + let mut bounds = None; + + for attr in &derive_input.attrs { + if !attr.path().is_ident(ARBITRARY_ATTRIBUTE_NAME) { + continue; + } + + let meta_list = match attr.meta { + Meta::List(ref l) => l, + _ => { + return Err(Error::new_spanned( + attr, + format!( + "invalid `{}` attribute. expected list", + ARBITRARY_ATTRIBUTE_NAME + ), + )) + } + }; + + for nested_meta in + meta_list.parse_args_with(Punctuated::::parse_terminated)? + { + match nested_meta { + Meta::NameValue(MetaNameValue { + path, + value: + Expr::Lit(ExprLit { + lit: Lit::Str(bound_str_lit), + .. + }), + .. + }) if path.is_ident("bound") => { + bounds + .get_or_insert_with(Vec::new) + .push(bound_str_lit.parse_with(Punctuated::parse_terminated)?); + } + _ => { + return Err(Error::new_spanned( + attr, + format!( + "invalid `{}` attribute. expected `bound = \"..\"`", + ARBITRARY_ATTRIBUTE_NAME, + ), + )) + } + } + } + } + + Ok(Self { bounds }) + } +} diff --git a/derive/src/field_attributes.rs b/derive/src/field_attributes.rs new file mode 100644 index 0000000000000000000000000000000000000000..9e5922e04b3fb65a032ec2848125f22dd8fa68d8 --- /dev/null +++ b/derive/src/field_attributes.rs @@ -0,0 +1,105 @@ +use crate::ARBITRARY_ATTRIBUTE_NAME; +use proc_macro2::{Span, TokenStream, TokenTree}; +use quote::quote; +use syn::{spanned::Spanned, *}; + +/// Determines how a value for a field should be constructed. +#[cfg_attr(test, derive(Debug))] +pub enum FieldConstructor { + /// Assume that Arbitrary is defined for the type of this field and use it (default) + Arbitrary, + + /// Places `Default::default()` as a field value. + Default, + + /// Use custom function or closure to generate a value for a field. + With(TokenStream), + + /// Set a field always to the given value. + Value(TokenStream), +} + +pub fn determine_field_constructor(field: &Field) -> Result { + let opt_attr = fetch_attr_from_field(field)?; + let ctor = match opt_attr { + Some(attr) => parse_attribute(attr)?, + None => FieldConstructor::Arbitrary, + }; + Ok(ctor) +} + +fn fetch_attr_from_field(field: &Field) -> Result> { + let found_attributes: Vec<_> = field + .attrs + .iter() + .filter(|a| { + let path = a.path(); + let name = quote!(#path).to_string(); + name == ARBITRARY_ATTRIBUTE_NAME + }) + .collect(); + if found_attributes.len() > 1 { + let name = field.ident.as_ref().unwrap(); + let msg = format!( + "Multiple conflicting #[{ARBITRARY_ATTRIBUTE_NAME}] attributes found on field `{name}`" + ); + return Err(syn::Error::new(field.span(), msg)); + } + Ok(found_attributes.into_iter().next()) +} + +fn parse_attribute(attr: &Attribute) -> Result { + if let Meta::List(ref meta_list) = attr.meta { + parse_attribute_internals(meta_list) + } else { + let msg = format!("#[{ARBITRARY_ATTRIBUTE_NAME}] must contain a group"); + Err(syn::Error::new(attr.span(), msg)) + } +} + +fn parse_attribute_internals(meta_list: &MetaList) -> Result { + let mut tokens_iter = meta_list.tokens.clone().into_iter(); + let token = tokens_iter.next().ok_or_else(|| { + let msg = format!("#[{ARBITRARY_ATTRIBUTE_NAME}] cannot be empty."); + syn::Error::new(meta_list.span(), msg) + })?; + match token.to_string().as_ref() { + "default" => Ok(FieldConstructor::Default), + "with" => { + let func_path = parse_assigned_value("with", tokens_iter, meta_list.span())?; + Ok(FieldConstructor::With(func_path)) + } + "value" => { + let value = parse_assigned_value("value", tokens_iter, meta_list.span())?; + Ok(FieldConstructor::Value(value)) + } + _ => { + let msg = format!("Unknown option for #[{ARBITRARY_ATTRIBUTE_NAME}]: `{token}`"); + Err(syn::Error::new(token.span(), msg)) + } + } +} + +// Input: +// = 2 + 2 +// Output: +// 2 + 2 +fn parse_assigned_value( + opt_name: &str, + mut tokens_iter: impl Iterator, + default_span: Span, +) -> Result { + let eq_sign = tokens_iter.next().ok_or_else(|| { + let msg = format!( + "Invalid syntax for #[{ARBITRARY_ATTRIBUTE_NAME}], `{opt_name}` is missing assignment." + ); + syn::Error::new(default_span, msg) + })?; + + if eq_sign.to_string() == "=" { + Ok(tokens_iter.collect()) + } else { + let msg = format!("Invalid syntax for #[{ARBITRARY_ATTRIBUTE_NAME}], expected `=` after `{opt_name}`, got: `{eq_sign}`"); + Err(syn::Error::new(eq_sign.span(), msg)) + } +} diff --git a/derive/src/lib.rs b/derive/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..886bb4924bd177f9a2403520d35c7774a25ce981 --- /dev/null +++ b/derive/src/lib.rs @@ -0,0 +1,403 @@ +extern crate proc_macro; + +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::*; + +mod container_attributes; +mod field_attributes; +use container_attributes::ContainerAttributes; +use field_attributes::{determine_field_constructor, FieldConstructor}; + +static ARBITRARY_ATTRIBUTE_NAME: &str = "arbitrary"; +static ARBITRARY_LIFETIME_NAME: &str = "'arbitrary"; + +#[proc_macro_derive(Arbitrary, attributes(arbitrary))] +pub fn derive_arbitrary(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = syn::parse_macro_input!(tokens as syn::DeriveInput); + expand_derive_arbitrary(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() +} + +fn expand_derive_arbitrary(input: syn::DeriveInput) -> Result { + let container_attrs = ContainerAttributes::from_derive_input(&input)?; + + let (lifetime_without_bounds, lifetime_with_bounds) = + build_arbitrary_lifetime(input.generics.clone()); + + let recursive_count = syn::Ident::new( + &format!("RECURSIVE_COUNT_{}", input.ident), + Span::call_site(), + ); + + let arbitrary_method = + gen_arbitrary_method(&input, lifetime_without_bounds.clone(), &recursive_count)?; + let size_hint_method = gen_size_hint_method(&input)?; + let name = input.ident; + + // Apply user-supplied bounds or automatic `T: ArbitraryBounds`. + let generics = apply_trait_bounds( + input.generics, + lifetime_without_bounds.clone(), + &container_attrs, + )?; + + // Build ImplGeneric with a lifetime (https://github.com/dtolnay/syn/issues/90) + let mut generics_with_lifetime = generics.clone(); + generics_with_lifetime + .params + .push(GenericParam::Lifetime(lifetime_with_bounds)); + let (impl_generics, _, _) = generics_with_lifetime.split_for_impl(); + + // Build TypeGenerics and WhereClause without a lifetime + let (_, ty_generics, where_clause) = generics.split_for_impl(); + + Ok(quote! { + const _: () = { + std::thread_local! { + #[allow(non_upper_case_globals)] + static #recursive_count: std::cell::Cell = std::cell::Cell::new(0); + } + + #[automatically_derived] + impl #impl_generics arbitrary::Arbitrary<#lifetime_without_bounds> for #name #ty_generics #where_clause { + #arbitrary_method + #size_hint_method + } + }; + }) +} + +// Returns: (lifetime without bounds, lifetime with bounds) +// Example: ("'arbitrary", "'arbitrary: 'a + 'b") +fn build_arbitrary_lifetime(generics: Generics) -> (LifetimeParam, LifetimeParam) { + let lifetime_without_bounds = + LifetimeParam::new(Lifetime::new(ARBITRARY_LIFETIME_NAME, Span::call_site())); + let mut lifetime_with_bounds = lifetime_without_bounds.clone(); + + for param in generics.params.iter() { + if let GenericParam::Lifetime(lifetime_def) = param { + lifetime_with_bounds + .bounds + .push(lifetime_def.lifetime.clone()); + } + } + + (lifetime_without_bounds, lifetime_with_bounds) +} + +fn apply_trait_bounds( + mut generics: Generics, + lifetime: LifetimeParam, + container_attrs: &ContainerAttributes, +) -> Result { + // If user-supplied bounds exist, apply them to their matching type parameters. + if let Some(config_bounds) = &container_attrs.bounds { + let mut config_bounds_applied = 0; + for param in generics.params.iter_mut() { + if let GenericParam::Type(type_param) = param { + if let Some(replacement) = config_bounds + .iter() + .flatten() + .find(|p| p.ident == type_param.ident) + { + *type_param = replacement.clone(); + config_bounds_applied += 1; + } else { + // If no user-supplied bounds exist for this type, delete the original bounds. + // This mimics serde. + type_param.bounds = Default::default(); + type_param.default = None; + } + } + } + let config_bounds_supplied = config_bounds + .iter() + .map(|bounds| bounds.len()) + .sum::(); + if config_bounds_applied != config_bounds_supplied { + return Err(Error::new( + Span::call_site(), + format!( + "invalid `{}` attribute. too many bounds, only {} out of {} are applicable", + ARBITRARY_ATTRIBUTE_NAME, config_bounds_applied, config_bounds_supplied, + ), + )); + } + Ok(generics) + } else { + // Otherwise, inject a `T: Arbitrary` bound for every parameter. + Ok(add_trait_bounds(generics, lifetime)) + } +} + +// Add a bound `T: Arbitrary` to every type parameter T. +fn add_trait_bounds(mut generics: Generics, lifetime: LifetimeParam) -> Generics { + for param in generics.params.iter_mut() { + if let GenericParam::Type(type_param) = param { + type_param + .bounds + .push(parse_quote!(arbitrary::Arbitrary<#lifetime>)); + } + } + generics +} + +fn with_recursive_count_guard( + recursive_count: &syn::Ident, + expr: impl quote::ToTokens, +) -> impl quote::ToTokens { + quote! { + let guard_against_recursion = u.is_empty(); + if guard_against_recursion { + #recursive_count.with(|count| { + if count.get() > 0 { + return Err(arbitrary::Error::NotEnoughData); + } + count.set(count.get() + 1); + Ok(()) + })?; + } + + let result = (|| { #expr })(); + + if guard_against_recursion { + #recursive_count.with(|count| { + count.set(count.get() - 1); + }); + } + + result + } +} + +fn gen_arbitrary_method( + input: &DeriveInput, + lifetime: LifetimeParam, + recursive_count: &syn::Ident, +) -> Result { + fn arbitrary_structlike( + fields: &Fields, + ident: &syn::Ident, + lifetime: LifetimeParam, + recursive_count: &syn::Ident, + ) -> Result { + let arbitrary = construct(fields, |_idx, field| gen_constructor_for_field(field))?; + let body = with_recursive_count_guard(recursive_count, quote! { Ok(#ident #arbitrary) }); + + let arbitrary_take_rest = construct_take_rest(fields)?; + let take_rest_body = + with_recursive_count_guard(recursive_count, quote! { Ok(#ident #arbitrary_take_rest) }); + + Ok(quote! { + fn arbitrary(u: &mut arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { + #body + } + + fn arbitrary_take_rest(mut u: arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { + #take_rest_body + } + }) + } + + let ident = &input.ident; + let output = match &input.data { + Data::Struct(data) => arbitrary_structlike(&data.fields, ident, lifetime, recursive_count)?, + Data::Union(data) => arbitrary_structlike( + &Fields::Named(data.fields.clone()), + ident, + lifetime, + recursive_count, + )?, + Data::Enum(data) => { + let variants: Vec = data + .variants + .iter() + .enumerate() + .map(|(i, variant)| { + let idx = i as u64; + let variant_name = &variant.ident; + construct(&variant.fields, |_, field| gen_constructor_for_field(field)) + .map(|ctor| quote! { #idx => #ident::#variant_name #ctor }) + }) + .collect::>()?; + + let variants_take_rest: Vec = data + .variants + .iter() + .enumerate() + .map(|(i, variant)| { + let idx = i as u64; + let variant_name = &variant.ident; + construct_take_rest(&variant.fields) + .map(|ctor| quote! { #idx => #ident::#variant_name #ctor }) + }) + .collect::>()?; + + let count = data.variants.len() as u64; + + let arbitrary = with_recursive_count_guard( + recursive_count, + quote! { + // Use a multiply + shift to generate a ranged random number + // with slight bias. For details, see: + // https://lemire.me/blog/2016/06/30/fast-random-shuffling + Ok(match (u64::from(::arbitrary(u)?) * #count) >> 32 { + #(#variants,)* + _ => unreachable!() + }) + }, + ); + + let arbitrary_take_rest = with_recursive_count_guard( + recursive_count, + quote! { + // Use a multiply + shift to generate a ranged random number + // with slight bias. For details, see: + // https://lemire.me/blog/2016/06/30/fast-random-shuffling + Ok(match (u64::from(::arbitrary(&mut u)?) * #count) >> 32 { + #(#variants_take_rest,)* + _ => unreachable!() + }) + }, + ); + + quote! { + fn arbitrary(u: &mut arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { + #arbitrary + } + + fn arbitrary_take_rest(mut u: arbitrary::Unstructured<#lifetime>) -> arbitrary::Result { + #arbitrary_take_rest + } + } + } + }; + Ok(output) +} + +fn construct( + fields: &Fields, + ctor: impl Fn(usize, &Field) -> Result, +) -> Result { + let output = match fields { + Fields::Named(names) => { + let names: Vec = names + .named + .iter() + .enumerate() + .map(|(i, f)| { + let name = f.ident.as_ref().unwrap(); + ctor(i, f).map(|ctor| quote! { #name: #ctor }) + }) + .collect::>()?; + quote! { { #(#names,)* } } + } + Fields::Unnamed(names) => { + let names: Vec = names + .unnamed + .iter() + .enumerate() + .map(|(i, f)| ctor(i, f).map(|ctor| quote! { #ctor })) + .collect::>()?; + quote! { ( #(#names),* ) } + } + Fields::Unit => quote!(), + }; + Ok(output) +} + +fn construct_take_rest(fields: &Fields) -> Result { + construct(fields, |idx, field| { + determine_field_constructor(field).map(|field_constructor| match field_constructor { + FieldConstructor::Default => quote!(Default::default()), + FieldConstructor::Arbitrary => { + if idx + 1 == fields.len() { + quote! { arbitrary::Arbitrary::arbitrary_take_rest(u)? } + } else { + quote! { arbitrary::Arbitrary::arbitrary(&mut u)? } + } + } + FieldConstructor::With(function_or_closure) => quote!((#function_or_closure)(&mut u)?), + FieldConstructor::Value(value) => quote!(#value), + }) + }) +} + +fn gen_size_hint_method(input: &DeriveInput) -> Result { + let size_hint_fields = |fields: &Fields| { + fields + .iter() + .map(|f| { + let ty = &f.ty; + determine_field_constructor(f).map(|field_constructor| { + match field_constructor { + FieldConstructor::Default | FieldConstructor::Value(_) => { + quote!((0, Some(0))) + } + FieldConstructor::Arbitrary => { + quote! { <#ty as arbitrary::Arbitrary>::size_hint(depth) } + } + + // Note that in this case it's hard to determine what size_hint must be, so size_of::() is + // just an educated guess, although it's gonna be inaccurate for dynamically + // allocated types (Vec, HashMap, etc.). + FieldConstructor::With(_) => { + quote! { (::core::mem::size_of::<#ty>(), None) } + } + } + }) + }) + .collect::>>() + .map(|hints| { + quote! { + arbitrary::size_hint::and_all(&[ + #( #hints ),* + ]) + } + }) + }; + let size_hint_structlike = |fields: &Fields| { + size_hint_fields(fields).map(|hint| { + quote! { + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::recursion_guard(depth, |depth| #hint) + } + } + }) + }; + match &input.data { + Data::Struct(data) => size_hint_structlike(&data.fields), + Data::Union(data) => size_hint_structlike(&Fields::Named(data.fields.clone())), + Data::Enum(data) => data + .variants + .iter() + .map(|v| size_hint_fields(&v.fields)) + .collect::>>() + .map(|variants| { + quote! { + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + arbitrary::size_hint::and( + ::size_hint(depth), + arbitrary::size_hint::recursion_guard(depth, |depth| { + arbitrary::size_hint::or_all(&[ #( #variants ),* ]) + }), + ) + } + } + }), + } +} + +fn gen_constructor_for_field(field: &Field) -> Result { + let ctor = match determine_field_constructor(field)? { + FieldConstructor::Default => quote!(Default::default()), + FieldConstructor::Arbitrary => quote!(arbitrary::Arbitrary::arbitrary(u)?), + FieldConstructor::With(function_or_closure) => quote!((#function_or_closure)(u)?), + FieldConstructor::Value(value) => quote!(#value), + }; + Ok(ctor) +} diff --git a/examples/derive_enum.rs b/examples/derive_enum.rs new file mode 100644 index 0000000000000000000000000000000000000000..c4fc9c958836f2c1d1ed947f0c15ef69631fe7f9 --- /dev/null +++ b/examples/derive_enum.rs @@ -0,0 +1,27 @@ +//! A simple example of deriving the `Arbitrary` trait for an `enum`. +//! +//! Note that this requires enabling the "derive" cargo feature. + +// Various enums/fields that we are deriving `Arbitrary` for aren't actually +// used except to show off the derive. +#![allow(dead_code)] + +use arbitrary::{Arbitrary, Unstructured}; + +#[derive(Arbitrary, Debug)] +enum MyEnum { + UnitVariant, + TupleVariant(bool, u32), + StructVariant { x: i8, y: (u8, i32) }, +} + +fn main() { + let raw = b"This is some raw, unstructured data!"; + + let mut unstructured = Unstructured::new(raw); + + let instance = MyEnum::arbitrary(&mut unstructured) + .expect("`unstructured` has enough underlying data to create all variants of `MyEnum`"); + + println!("Here is an arbitrary enum: {:?}", instance); +} diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a0925114d619b8cee7d72961b052ddcc42023a24 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..91893b991ddf7195a0a0e5af96fc64e9e2deae93 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "arbitrary-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.arbitrary] +path = ".." + +[[bin]] +name = "int_in_range" +path = "fuzz_targets/int_in_range.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/int_in_range.rs b/fuzz/fuzz_targets/int_in_range.rs new file mode 100644 index 0000000000000000000000000000000000000000..14c07f04d3bcefd95cfdff4d19e4bf79722abad8 --- /dev/null +++ b/fuzz/fuzz_targets/int_in_range.rs @@ -0,0 +1,49 @@ +#![no_main] + +use arbitrary::{unstructured::Int, Arbitrary, Result, Unstructured}; +use libfuzzer_sys::fuzz_target; +use std::{fmt::Display, ops::RangeInclusive}; + +fuzz_target!(|data: &[u8]| { + fuzz(data).expect("`int_in_range` should never return an error"); +}); + +fn fuzz(data: &[u8]) -> Result<()> { + let mut u = Unstructured::new(data); + + let choices = [ + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + assert_in_range::, + ]; + + let f = u.choose(&choices[..])?; + f(&mut u) +} + +fn assert_in_range<'a, 'b, T>(u: &'a mut Unstructured<'b>) -> Result<()> +where + T: Arbitrary<'b> + Int + Display, +{ + let range = RangeInclusive::::arbitrary(u)?; + let start = *range.start(); + let end = *range.end(); + + let x = u.int_in_range(range)?; + + if start <= x && x <= end { + return Ok(()); + } + + let ty = std::any::type_name::(); + panic!("{ty}: {x} is not within {start}..={end}",); +} diff --git a/publish.sh b/publish.sh new file mode 100644 index 0000000000000000000000000000000000000000..24db2bd779297e12fc938b47d624e3a9dcf6eb9a --- /dev/null +++ b/publish.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -eux + +cd $(dirname $0)/derive + +cargo publish + +cd .. + +# Let the crates.io index figure out we've published `derive_arbitrary` already. +sleep 5 + +cargo publish diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000000000000000000000000000000000000..6ca8f190cf1ed642b9d25dae4eea17a5f0eb98e1 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,53 @@ +use std::{error, fmt}; + +/// An enumeration of buffer creation errors +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum Error { + /// No choices were provided to the Unstructured::choose call + EmptyChoose, + /// There was not enough underlying data to fulfill some request for raw + /// bytes. + NotEnoughData, + /// The input bytes were not of the right format + IncorrectFormat, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Error::EmptyChoose => write!( + f, + "`arbitrary::Unstructured::choose` must be given a non-empty set of choices" + ), + Error::NotEnoughData => write!( + f, + "There is not enough underlying raw data to construct an `Arbitrary` instance" + ), + Error::IncorrectFormat => write!( + f, + "The raw data is not of the correct format to construct this type" + ), + } + } +} + +impl error::Error for Error {} + +/// A `Result` with the error type fixed as `arbitrary::Error`. +/// +/// Either an `Ok(T)` or `Err(arbitrary::Error)`. +pub type Result = std::result::Result; + +#[cfg(test)] +mod tests { + // Often people will import our custom `Result` type because 99.9% of + // results in a file will be `arbitrary::Result` but then have that one last + // 0.1% that want to have a custom error type. Don't make them prefix that + // 0.1% as `std::result::Result`; instead, let `arbitrary::Result` have an + // overridable error type. + #[test] + fn can_use_custom_error_types_with_result() -> super::Result<(), String> { + Ok(()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..ba2165c4e4d8fc32d03e375596a132ef12f0fb07 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,1422 @@ +// Copyright © 2019 The Rust Fuzz Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! The `Arbitrary` trait crate. +//! +//! This trait provides an [`Arbitrary`](./trait.Arbitrary.html) trait to +//! produce well-typed, structured values, from raw, byte buffers. It is +//! generally intended to be used with fuzzers like AFL or libFuzzer. See the +//! [`Arbitrary`](./trait.Arbitrary.html) trait's documentation for details on +//! automatically deriving, implementing, and/or using the trait. + +#![deny(bad_style)] +#![deny(missing_docs)] +#![deny(future_incompatible)] +#![deny(nonstandard_style)] +#![deny(rust_2018_compatibility)] +#![deny(rust_2018_idioms)] +#![deny(unused)] + +#[cfg(feature = "derive_arbitrary")] +pub use derive_arbitrary::*; + +mod error; +pub use error::*; + +pub mod unstructured; +#[doc(inline)] +pub use unstructured::Unstructured; + +pub mod size_hint; + +use core::array; +use core::cell::{Cell, RefCell, UnsafeCell}; +use core::iter; +use core::mem; +use core::num::{NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize}; +use core::num::{NonZeroU128, NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize}; +use core::ops::{Range, RangeBounds, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive}; +use core::str; +use core::time::Duration; +use std::borrow::{Cow, ToOwned}; +use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; +use std::ffi::{CString, OsString}; +use std::hash::BuildHasher; +use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::ops::Bound; +use std::path::PathBuf; +use std::rc::Rc; +use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicUsize}; +use std::sync::{Arc, Mutex}; + +/// Generate arbitrary structured values from raw, unstructured data. +/// +/// The `Arbitrary` trait allows you to generate valid structured values, like +/// `HashMap`s, or ASTs, or `MyTomlConfig`, or any other data structure from +/// raw, unstructured bytes provided by a fuzzer. +/// +/// # Deriving `Arbitrary` +/// +/// Automatically deriving the `Arbitrary` trait is the recommended way to +/// implement `Arbitrary` for your types. +/// +/// Using the custom derive requires that you enable the `"derive"` cargo +/// feature in your `Cargo.toml`: +/// +/// ```toml +/// [dependencies] +/// arbitrary = { version = "1", features = ["derive"] } +/// ``` +/// +/// Then, you add the `#[derive(Arbitrary)]` annotation to your `struct` or +/// `enum` type definition: +/// +/// ``` +/// # #[cfg(feature = "derive")] mod foo { +/// use arbitrary::Arbitrary; +/// use std::collections::HashSet; +/// +/// #[derive(Arbitrary)] +/// pub struct AddressBook { +/// friends: HashSet, +/// } +/// +/// #[derive(Arbitrary, Hash, Eq, PartialEq)] +/// pub enum Friend { +/// Buddy { name: String }, +/// Pal { age: usize }, +/// } +/// # } +/// ``` +/// +/// Every member of the `struct` or `enum` must also implement `Arbitrary`. +/// +/// # Implementing `Arbitrary` By Hand +/// +/// Implementing `Arbitrary` mostly involves nested calls to other `Arbitrary` +/// arbitrary implementations for each of your `struct` or `enum`'s members. But +/// sometimes you need some amount of raw data, or you need to generate a +/// variably-sized collection type, or something of that sort. The +/// [`Unstructured`][crate::Unstructured] type helps you with these tasks. +/// +/// ``` +/// # #[cfg(feature = "derive")] mod foo { +/// # pub struct MyCollection { _t: std::marker::PhantomData } +/// # impl MyCollection { +/// # pub fn new() -> Self { MyCollection { _t: std::marker::PhantomData } } +/// # pub fn insert(&mut self, element: T) {} +/// # } +/// use arbitrary::{Arbitrary, Result, Unstructured}; +/// +/// impl<'a, T> Arbitrary<'a> for MyCollection +/// where +/// T: Arbitrary<'a>, +/// { +/// fn arbitrary(u: &mut Unstructured<'a>) -> Result { +/// // Get an iterator of arbitrary `T`s. +/// let iter = u.arbitrary_iter::()?; +/// +/// // And then create a collection! +/// let mut my_collection = MyCollection::new(); +/// for elem_result in iter { +/// let elem = elem_result?; +/// my_collection.insert(elem); +/// } +/// +/// Ok(my_collection) +/// } +/// } +/// # } +/// ``` +pub trait Arbitrary<'a>: Sized { + /// Generate an arbitrary value of `Self` from the given unstructured data. + /// + /// Calling `Arbitrary::arbitrary` requires that you have some raw data, + /// perhaps given to you by a fuzzer like AFL or libFuzzer. You wrap this + /// raw data in an `Unstructured`, and then you can call `::arbitrary` to construct an arbitrary instance of `MyType` + /// from that unstructured data. + /// + /// Implementations may return an error if there is not enough data to + /// construct a full instance of `Self`, or they may fill out the rest of + /// `Self` with dummy values. Using dummy values when the underlying data is + /// exhausted can help avoid accidentally "defeating" some of the fuzzer's + /// mutations to the underlying byte stream that might otherwise lead to + /// interesting runtime behavior or new code coverage if only we had just a + /// few more bytes. However, it also requires that implementations for + /// recursive types (e.g. `struct Foo(Option>)`) avoid infinite + /// recursion when the underlying data is exhausted. + /// + /// ``` + /// # #[cfg(feature = "derive")] fn foo() { + /// use arbitrary::{Arbitrary, Unstructured}; + /// + /// #[derive(Arbitrary)] + /// pub struct MyType { + /// // ... + /// } + /// + /// // Get the raw data from the fuzzer or wherever else. + /// # let get_raw_data_from_fuzzer = || &[]; + /// let raw_data: &[u8] = get_raw_data_from_fuzzer(); + /// + /// // Wrap that raw data in an `Unstructured`. + /// let mut unstructured = Unstructured::new(raw_data); + /// + /// // Generate an arbitrary instance of `MyType` and do stuff with it. + /// if let Ok(value) = MyType::arbitrary(&mut unstructured) { + /// # let do_stuff = |_| {}; + /// do_stuff(value); + /// } + /// # } + /// ``` + /// + /// See also the documentation for [`Unstructured`][crate::Unstructured]. + fn arbitrary(u: &mut Unstructured<'a>) -> Result; + + /// Generate an arbitrary value of `Self` from the entirety of the given + /// unstructured data. + /// + /// This is similar to Arbitrary::arbitrary, however it assumes that it is + /// the last consumer of the given data, and is thus able to consume it all + /// if it needs. See also the documentation for + /// [`Unstructured`][crate::Unstructured]. + fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { + Self::arbitrary(&mut u) + } + + /// Get a size hint for how many bytes out of an `Unstructured` this type + /// needs to construct itself. + /// + /// This is useful for determining how many elements we should insert when + /// creating an arbitrary collection. + /// + /// The return value is similar to + /// [`Iterator::size_hint`][iterator-size-hint]: it returns a tuple where + /// the first element is a lower bound on the number of bytes required, and + /// the second element is an optional upper bound. + /// + /// The default implementation return `(0, None)` which is correct for any + /// type, but not ultimately that useful. Using `#[derive(Arbitrary)]` will + /// create a better implementation. If you are writing an `Arbitrary` + /// implementation by hand, and your type can be part of a dynamically sized + /// collection (such as `Vec`), you are strongly encouraged to override this + /// default with a better implementation. The + /// [`size_hint`][crate::size_hint] module will help with this task. + /// + /// ## Invariant + /// + /// It must be possible to construct every possible output using only inputs + /// of lengths bounded by these parameters. This applies to both + /// [`Arbitrary::arbitrary`] and [`Arbitrary::arbitrary_take_rest`]. + /// + /// This is trivially true for `(0, None)`. To restrict this further, it + /// must be proven that all inputs that are now excluded produced redundant + /// outputs which are still possible to produce using the reduced input + /// space. + /// + /// ## The `depth` Parameter + /// + /// If you 100% know that the type you are implementing `Arbitrary` for is + /// not a recursive type, or your implementation is not transitively calling + /// any other `size_hint` methods, you can ignore the `depth` parameter. + /// Note that if you are implementing `Arbitrary` for a generic type, you + /// cannot guarantee the lack of type recursion! + /// + /// Otherwise, you need to use + /// [`arbitrary::size_hint::recursion_guard(depth)`][crate::size_hint::recursion_guard] + /// to prevent potential infinite recursion when calculating size hints for + /// potentially recursive types: + /// + /// ``` + /// use arbitrary::{Arbitrary, Unstructured, size_hint}; + /// + /// // This can potentially be a recursive type if `L` or `R` contain + /// // something like `Box>>`! + /// enum MyEither { + /// Left(L), + /// Right(R), + /// } + /// + /// impl<'a, L, R> Arbitrary<'a> for MyEither + /// where + /// L: Arbitrary<'a>, + /// R: Arbitrary<'a>, + /// { + /// fn arbitrary(u: &mut Unstructured) -> arbitrary::Result { + /// // ... + /// # unimplemented!() + /// } + /// + /// fn size_hint(depth: usize) -> (usize, Option) { + /// // Protect against potential infinite recursion with + /// // `recursion_guard`. + /// size_hint::recursion_guard(depth, |depth| { + /// // If we aren't too deep, then `recursion_guard` calls + /// // this closure, which implements the natural size hint. + /// // Don't forget to use the new `depth` in all nested + /// // `size_hint` calls! We recommend shadowing the + /// // parameter, like what is done here, so that you can't + /// // accidentally use the wrong depth. + /// size_hint::or( + /// ::size_hint(depth), + /// ::size_hint(depth), + /// ) + /// }) + /// } + /// } + /// ``` + /// + /// [iterator-size-hint]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.size_hint + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + let _ = depth; + (0, None) + } +} + +impl<'a> Arbitrary<'a> for () { + fn arbitrary(_: &mut Unstructured<'a>) -> Result { + Ok(()) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, Some(0)) + } +} + +impl<'a> Arbitrary<'a> for bool { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(>::arbitrary(u)? & 1 == 1) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +macro_rules! impl_arbitrary_for_integers { + ( $( $ty:ty: $unsigned:ty; )* ) => { + $( + impl<'a> Arbitrary<'a> for $ty { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let mut buf = [0; mem::size_of::<$ty>()]; + u.fill_buffer(&mut buf)?; + let mut x: $unsigned = 0; + for i in 0..mem::size_of::<$ty>() { + x |= buf[i] as $unsigned << (i * 8); + } + Ok(x as $ty) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + let n = mem::size_of::<$ty>(); + (n, Some(n)) + } + + } + )* + } +} + +impl_arbitrary_for_integers! { + u8: u8; + u16: u16; + u32: u32; + u64: u64; + u128: u128; + usize: usize; + i8: u8; + i16: u16; + i32: u32; + i64: u64; + i128: u128; + isize: usize; +} + +macro_rules! impl_arbitrary_for_floats { + ( $( $ty:ident : $unsigned:ty; )* ) => { + $( + impl<'a> Arbitrary<'a> for $ty { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Self::from_bits(<$unsigned as Arbitrary<'a>>::arbitrary(u)?)) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + <$unsigned as Arbitrary<'a>>::size_hint(depth) + } + } + )* + } +} + +impl_arbitrary_for_floats! { + f32: u32; + f64: u64; +} + +impl<'a> Arbitrary<'a> for char { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + use std::char; + // The highest unicode code point is 0x11_FFFF + const CHAR_END: u32 = 0x11_0000; + // The size of the surrogate blocks + const SURROGATES_START: u32 = 0xD800; + let mut c = >::arbitrary(u)? % CHAR_END; + if let Some(c) = char::from_u32(c) { + Ok(c) + } else { + // We found a surrogate, wrap and try again + c -= SURROGATES_START; + Ok(char::from_u32(c) + .expect("Generated character should be valid! This is a bug in arbitrary-rs")) + } + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for AtomicBool { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for AtomicIsize { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for AtomicUsize { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +macro_rules! impl_range { + ( + $range:ty, + $value_closure:expr, + $value_ty:ty, + $fun:ident($fun_closure:expr), + $size_hint_closure:expr + ) => { + impl<'a, A> Arbitrary<'a> for $range + where + A: Arbitrary<'a> + Clone + PartialOrd, + { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let value: $value_ty = Arbitrary::arbitrary(u)?; + Ok($fun(value, $fun_closure)) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + $size_hint_closure(depth) + } + } + }; +} + +impl_range!( + Range, + |r: &Range| (r.start.clone(), r.end.clone()), + (A, A), + bounded_range(|(a, b)| a..b), + |depth| crate::size_hint::and( + ::size_hint(depth), + ::size_hint(depth) + ) +); +impl_range!( + RangeFrom, + |r: &RangeFrom| r.start.clone(), + A, + unbounded_range(|a| a..), + |depth| ::size_hint(depth) +); +impl_range!( + RangeInclusive, + |r: &RangeInclusive| (r.start().clone(), r.end().clone()), + (A, A), + bounded_range(|(a, b)| a..=b), + |depth| crate::size_hint::and( + ::size_hint(depth), + ::size_hint(depth) + ) +); +impl_range!( + RangeTo, + |r: &RangeTo| r.end.clone(), + A, + unbounded_range(|b| ..b), + |depth| ::size_hint(depth) +); +impl_range!( + RangeToInclusive, + |r: &RangeToInclusive| r.end.clone(), + A, + unbounded_range(|b| ..=b), + |depth| ::size_hint(depth) +); + +pub(crate) fn bounded_range(bounds: (I, I), cb: CB) -> R +where + CB: Fn((I, I)) -> R, + I: PartialOrd, + R: RangeBounds, +{ + let (mut start, mut end) = bounds; + if start > end { + mem::swap(&mut start, &mut end); + } + cb((start, end)) +} + +pub(crate) fn unbounded_range(bound: I, cb: CB) -> R +where + CB: Fn(I) -> R, + R: RangeBounds, +{ + cb(bound) +} + +impl<'a> Arbitrary<'a> for Duration { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Self::new( + ::arbitrary(u)?, + u.int_in_range(0..=999_999_999)?, + )) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::and( + ::size_hint(depth), + ::size_hint(depth), + ) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Option { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(if >::arbitrary(u)? { + Some(Arbitrary::arbitrary(u)?) + } else { + None + }) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::and( + ::size_hint(depth), + crate::size_hint::or((0, Some(0)), ::size_hint(depth)), + ) + } +} + +impl<'a, A: Arbitrary<'a>, B: Arbitrary<'a>> Arbitrary<'a> for std::result::Result { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(if >::arbitrary(u)? { + Ok(::arbitrary(u)?) + } else { + Err(::arbitrary(u)?) + }) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::and( + ::size_hint(depth), + crate::size_hint::or( + ::size_hint(depth), + ::size_hint(depth), + ), + ) + } +} + +macro_rules! arbitrary_tuple { + () => {}; + ($last: ident $($xs: ident)*) => { + arbitrary_tuple!($($xs)*); + + impl<'a, $($xs: Arbitrary<'a>,)* $last: Arbitrary<'a>> Arbitrary<'a> for ($($xs,)* $last,) { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(($($xs::arbitrary(u)?,)* Arbitrary::arbitrary(u)?,)) + } + + #[allow(unused_mut, non_snake_case)] + fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { + $(let $xs = $xs::arbitrary(&mut u)?;)* + let $last = $last::arbitrary_take_rest(u)?; + Ok(($($xs,)* $last,)) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::and_all(&[ + <$last as Arbitrary>::size_hint(depth), + $( <$xs as Arbitrary>::size_hint(depth) ),* + ]) + } + } + }; +} +arbitrary_tuple!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z); + +// Helper to safely create arrays since the standard library doesn't +// provide one yet. Shouldn't be necessary in the future. +struct ArrayGuard { + dst: *mut T, + initialized: usize, +} + +impl Drop for ArrayGuard { + fn drop(&mut self) { + debug_assert!(self.initialized <= N); + let initialized_part = core::ptr::slice_from_raw_parts_mut(self.dst, self.initialized); + unsafe { + core::ptr::drop_in_place(initialized_part); + } + } +} + +fn try_create_array(mut cb: F) -> Result<[T; N]> +where + F: FnMut(usize) -> Result, +{ + let mut array: mem::MaybeUninit<[T; N]> = mem::MaybeUninit::uninit(); + let array_ptr = array.as_mut_ptr(); + let dst = array_ptr as _; + let mut guard: ArrayGuard = ArrayGuard { + dst, + initialized: 0, + }; + unsafe { + for (idx, value_ptr) in (*array.as_mut_ptr()).iter_mut().enumerate() { + core::ptr::write(value_ptr, cb(idx)?); + guard.initialized += 1; + } + mem::forget(guard); + Ok(array.assume_init()) + } +} + +impl<'a, T, const N: usize> Arbitrary<'a> for [T; N] +where + T: Arbitrary<'a>, +{ + #[inline] + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + try_create_array(|_| >::arbitrary(u)) + } + + #[inline] + fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { + let mut array = Self::arbitrary(&mut u)?; + if let Some(last) = array.last_mut() { + *last = Arbitrary::arbitrary_take_rest(u)?; + } + Ok(array) + } + + #[inline] + fn size_hint(d: usize) -> (usize, Option) { + crate::size_hint::and_all(&array::from_fn::<_, N, _>(|_| { + ::size_hint(d) + })) + } +} + +impl<'a> Arbitrary<'a> for &'a [u8] { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let len = u.arbitrary_len::()?; + u.bytes(len) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + Ok(u.take_rest()) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Vec { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, K: Arbitrary<'a> + Ord, V: Arbitrary<'a>> Arbitrary<'a> for BTreeMap { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BTreeSet { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Bound { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + match u.int_in_range::(0..=2)? { + 0 => Ok(Bound::Included(A::arbitrary(u)?)), + 1 => Ok(Bound::Excluded(A::arbitrary(u)?)), + 2 => Ok(Bound::Unbounded), + _ => unreachable!(), + } + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + size_hint::or( + size_hint::and((1, Some(1)), A::size_hint(depth)), + (1, Some(1)), + ) + } +} + +impl<'a, A: Arbitrary<'a> + Ord> Arbitrary<'a> for BinaryHeap { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, K: Arbitrary<'a> + Eq + ::std::hash::Hash, V: Arbitrary<'a>, S: BuildHasher + Default> + Arbitrary<'a> for HashMap +{ + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a> + Eq + ::std::hash::Hash, S: BuildHasher + Default> Arbitrary<'a> + for HashSet +{ + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for LinkedList { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for VecDeque { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + u.arbitrary_iter()?.collect() + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + u.arbitrary_take_rest_iter()?.collect() + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a, A> Arbitrary<'a> for Cow<'a, A> +where + A: ToOwned + ?Sized, + ::Owned: Arbitrary<'a>, +{ + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Cow::Owned) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::recursion_guard(depth, |depth| { + <::Owned as Arbitrary>::size_hint(depth) + }) + } +} + +fn arbitrary_str<'a>(u: &mut Unstructured<'a>, size: usize) -> Result<&'a str> { + match str::from_utf8(u.peek_bytes(size).unwrap()) { + Ok(s) => { + u.bytes(size).unwrap(); + Ok(s) + } + Err(e) => { + let i = e.valid_up_to(); + let valid = u.bytes(i).unwrap(); + let s = unsafe { + debug_assert!(str::from_utf8(valid).is_ok()); + str::from_utf8_unchecked(valid) + }; + Ok(s) + } + } +} + +impl<'a> Arbitrary<'a> for &'a str { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let size = u.arbitrary_len::()?; + arbitrary_str(u, size) + } + + fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result { + let size = u.len(); + arbitrary_str(&mut u, size) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, None) + } +} + +impl<'a> Arbitrary<'a> for String { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + <&str as Arbitrary>::arbitrary(u).map(Into::into) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + <&str as Arbitrary>::arbitrary_take_rest(u).map(Into::into) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + <&str as Arbitrary>::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for CString { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + as Arbitrary>::arbitrary(u).map(|mut x| { + x.retain(|&c| c != 0); + Self::new(x).unwrap() + }) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + as Arbitrary>::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for OsString { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + ::arbitrary(u).map(From::from) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + ::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for PathBuf { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + ::arbitrary(u).map(From::from) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + ::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::recursion_guard(depth, ::size_hint) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box<[A]> { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + as Arbitrary>::arbitrary(u).map(|x| x.into_boxed_slice()) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + as Arbitrary>::size_hint(depth) + } +} + +impl<'a> Arbitrary<'a> for Box { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + ::arbitrary(u).map(|x| x.into_boxed_str()) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + ::size_hint(depth) + } +} + +// impl Arbitrary for Box { +// fn arbitrary(u: &mut Unstructured<'_>) -> Result { +// ::arbitrary(u).map(|x| x.into_boxed_c_str()) +// } +// } + +// impl Arbitrary for Box { +// fn arbitrary(u: &mut Unstructured<'_>) -> Result { +// ::arbitrary(u).map(|x| x.into_boxed_osstr()) +// +// } +// } + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Arc { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::recursion_guard(depth, ::size_hint) + } +} + +impl<'a> Arbitrary<'a> for Arc { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + <&str as Arbitrary>::arbitrary(u).map(Into::into) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + <&str as Arbitrary>::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Rc { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + crate::size_hint::recursion_guard(depth, ::size_hint) + } +} + +impl<'a> Arbitrary<'a> for Rc { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + <&str as Arbitrary>::arbitrary(u).map(Into::into) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + <&str as Arbitrary>::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Cell { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for RefCell { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for UnsafeCell { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Mutex { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(Self::new) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for iter::Empty { + fn arbitrary(_: &mut Unstructured<'a>) -> Result { + Ok(iter::empty()) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, Some(0)) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for ::std::marker::PhantomData { + fn arbitrary(_: &mut Unstructured<'a>) -> Result { + Ok(::std::marker::PhantomData) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (0, Some(0)) + } +} + +impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for ::std::num::Wrapping { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Arbitrary::arbitrary(u).map(::std::num::Wrapping) + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + >::size_hint(depth) + } +} + +macro_rules! implement_nonzero_int { + ($nonzero:ty, $int:ty) => { + impl<'a> Arbitrary<'a> for $nonzero { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + match Self::new(<$int as Arbitrary<'a>>::arbitrary(u)?) { + Some(n) => Ok(n), + None => Err(Error::IncorrectFormat), + } + } + + #[inline] + fn size_hint(depth: usize) -> (usize, Option) { + <$int as Arbitrary<'a>>::size_hint(depth) + } + } + }; +} + +implement_nonzero_int! { NonZeroI8, i8 } +implement_nonzero_int! { NonZeroI16, i16 } +implement_nonzero_int! { NonZeroI32, i32 } +implement_nonzero_int! { NonZeroI64, i64 } +implement_nonzero_int! { NonZeroI128, i128 } +implement_nonzero_int! { NonZeroIsize, isize } +implement_nonzero_int! { NonZeroU8, u8 } +implement_nonzero_int! { NonZeroU16, u16 } +implement_nonzero_int! { NonZeroU32, u32 } +implement_nonzero_int! { NonZeroU64, u64 } +implement_nonzero_int! { NonZeroU128, u128 } +implement_nonzero_int! { NonZeroUsize, usize } + +impl<'a> Arbitrary<'a> for Ipv4Addr { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Ipv4Addr::from(u32::arbitrary(u)?)) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (4, Some(4)) + } +} + +impl<'a> Arbitrary<'a> for Ipv6Addr { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Ipv6Addr::from(u128::arbitrary(u)?)) + } + + #[inline] + fn size_hint(_depth: usize) -> (usize, Option) { + (16, Some(16)) + } +} + +impl<'a> Arbitrary<'a> for IpAddr { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + if u.arbitrary()? { + Ok(IpAddr::V4(u.arbitrary()?)) + } else { + Ok(IpAddr::V6(u.arbitrary()?)) + } + } + + fn size_hint(depth: usize) -> (usize, Option) { + size_hint::and( + bool::size_hint(depth), + size_hint::or(Ipv4Addr::size_hint(depth), Ipv6Addr::size_hint(depth)), + ) + } +} + +#[cfg(test)] +mod test { + use super::*; + + /// Generates an arbitrary `T`, and checks that the result is consistent with the + /// `size_hint()` reported by `T`. + fn checked_arbitrary<'a, T: Arbitrary<'a>>(u: &mut Unstructured<'a>) -> Result { + let (min, max) = T::size_hint(0); + + let len_before = u.len(); + let result = T::arbitrary(u); + + let consumed = len_before - u.len(); + + if let Some(max) = max { + assert!( + consumed <= max, + "incorrect maximum size: indicated {}, actually consumed {}", + max, + consumed + ); + } + + if result.is_ok() { + assert!( + consumed >= min, + "incorrect minimum size: indicated {}, actually consumed {}", + min, + consumed + ); + } + + result + } + + /// Like `checked_arbitrary()`, but calls `arbitrary_take_rest()` instead of `arbitrary()`. + fn checked_arbitrary_take_rest<'a, T: Arbitrary<'a>>(u: Unstructured<'a>) -> Result { + let (min, _) = T::size_hint(0); + + let len_before = u.len(); + let result = T::arbitrary_take_rest(u); + + if result.is_ok() { + assert!( + len_before >= min, + "incorrect minimum size: indicated {}, worked with {}", + min, + len_before + ); + } + + result + } + + #[test] + fn finite_buffer_fill_buffer() { + let x = [1, 2, 3, 4]; + let mut rb = Unstructured::new(&x); + let mut z = [0; 2]; + rb.fill_buffer(&mut z).unwrap(); + assert_eq!(z, [1, 2]); + rb.fill_buffer(&mut z).unwrap(); + assert_eq!(z, [3, 4]); + rb.fill_buffer(&mut z).unwrap(); + assert_eq!(z, [0, 0]); + } + + #[test] + fn arbitrary_for_integers() { + let x = [1, 2, 3, 4]; + let mut buf = Unstructured::new(&x); + let expected = 1 | (2 << 8) | (3 << 16) | (4 << 24); + let actual = checked_arbitrary::(&mut buf).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn arbitrary_for_bytes() { + let x = [1, 2, 3, 4, 4]; + let mut buf = Unstructured::new(&x); + let expected = &[1, 2, 3, 4]; + let actual = checked_arbitrary::<&[u8]>(&mut buf).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn arbitrary_take_rest_for_bytes() { + let x = [1, 2, 3, 4]; + let buf = Unstructured::new(&x); + let expected = &[1, 2, 3, 4]; + let actual = checked_arbitrary_take_rest::<&[u8]>(buf).unwrap(); + assert_eq!(expected, actual); + } + + #[test] + fn arbitrary_collection() { + let x = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 12, + ]; + assert_eq!( + checked_arbitrary::<&[u8]>(&mut Unstructured::new(&x)).unwrap(), + &[1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3] + ); + assert_eq!( + checked_arbitrary::>(&mut Unstructured::new(&x)).unwrap(), + &[2, 4, 6, 8, 1] + ); + assert_eq!( + checked_arbitrary::>(&mut Unstructured::new(&x)).unwrap(), + &[84148994] + ); + assert_eq!( + checked_arbitrary::(&mut Unstructured::new(&x)).unwrap(), + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x01\x02\x03" + ); + } + + #[test] + fn arbitrary_take_rest() { + // Basic examples + let x = [1, 2, 3, 4]; + assert_eq!( + checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&x)).unwrap(), + &[1, 2, 3, 4] + ); + assert_eq!( + checked_arbitrary_take_rest::>(Unstructured::new(&x)).unwrap(), + &[1, 2, 3, 4] + ); + assert_eq!( + checked_arbitrary_take_rest::>(Unstructured::new(&x)).unwrap(), + &[0x4030201] + ); + assert_eq!( + checked_arbitrary_take_rest::(Unstructured::new(&x)).unwrap(), + "\x01\x02\x03\x04" + ); + + // Empty remainder + assert_eq!( + checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&[])).unwrap(), + &[] + ); + assert_eq!( + checked_arbitrary_take_rest::>(Unstructured::new(&[])).unwrap(), + &[] + ); + + // Cannot consume all but can consume part of the input + assert_eq!( + checked_arbitrary_take_rest::(Unstructured::new(&[1, 0xFF, 2])).unwrap(), + "\x01" + ); + } + + #[test] + fn size_hint_for_tuples() { + assert_eq!( + (7, Some(7)), + <(bool, u16, i32) as Arbitrary<'_>>::size_hint(0) + ); + assert_eq!((1, None), <(u8, Vec) as Arbitrary>::size_hint(0)); + } +} + +/// Multiple conflicting arbitrary attributes are used on the same field: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(value = 2)] +/// #[arbitrary(value = 2)] +/// x: i32, +/// } +/// ``` +/// +/// An unknown attribute: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(unknown_attr)] +/// x: i32, +/// } +/// ``` +/// +/// An unknown attribute with a value: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(unknown_attr = 13)] +/// x: i32, +/// } +/// ``` +/// +/// `value` without RHS: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(value)] +/// x: i32, +/// } +/// ``` +/// +/// `with` without RHS: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// struct Point { +/// #[arbitrary(with)] +/// x: i32, +/// } +/// ``` +/// +/// Multiple conflicting bounds at the container-level: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// #[arbitrary(bound = "T: Default")] +/// #[arbitrary(bound = "T: Default")] +/// struct Point { +/// #[arbitrary(default)] +/// x: T, +/// } +/// ``` +/// +/// Multiple conflicting bounds in a single bound attribute: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// #[arbitrary(bound = "T: Default, T: Default")] +/// struct Point { +/// #[arbitrary(default)] +/// x: T, +/// } +/// ``` +/// +/// Multiple conflicting bounds in multiple bound attributes: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// #[arbitrary(bound = "T: Default", bound = "T: Default")] +/// struct Point { +/// #[arbitrary(default)] +/// x: T, +/// } +/// ``` +/// +/// Too many bounds supplied: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// #[arbitrary(bound = "T: Default")] +/// struct Point { +/// x: i32, +/// } +/// ``` +/// +/// Too many bounds supplied across multiple attributes: +/// ```compile_fail +/// #[derive(::arbitrary::Arbitrary)] +/// #[arbitrary(bound = "T: Default")] +/// #[arbitrary(bound = "U: Default")] +/// struct Point { +/// #[arbitrary(default)] +/// x: T, +/// } +/// ``` +#[cfg(all(doctest, feature = "derive"))] +pub struct CompileFailTests; diff --git a/src/size_hint.rs b/src/size_hint.rs new file mode 100644 index 0000000000000000000000000000000000000000..045c148a2971126dc42568b5ea5cded955a98891 --- /dev/null +++ b/src/size_hint.rs @@ -0,0 +1,124 @@ +//! Utilities for working with and combining the results of +//! [`Arbitrary::size_hint`][crate::Arbitrary::size_hint]. + +/// Protects against potential infinite recursion when calculating size hints +/// due to indirect type recursion. +/// +/// When the depth is not too deep, calls `f` with `depth + 1` to calculate the +/// size hint. +/// +/// Otherwise, returns the default size hint: `(0, None)`. +#[inline] +pub fn recursion_guard( + depth: usize, + f: impl FnOnce(usize) -> (usize, Option), +) -> (usize, Option) { + const MAX_DEPTH: usize = 20; + if depth > MAX_DEPTH { + (0, None) + } else { + f(depth + 1) + } +} + +/// Take the sum of the `lhs` and `rhs` size hints. +#[inline] +pub fn and(lhs: (usize, Option), rhs: (usize, Option)) -> (usize, Option) { + let lower = lhs.0 + rhs.0; + let upper = lhs.1.and_then(|lhs| rhs.1.map(|rhs| lhs + rhs)); + (lower, upper) +} + +/// Take the sum of all of the given size hints. +/// +/// If `hints` is empty, returns `(0, Some(0))`, aka the size of consuming +/// nothing. +#[inline] +pub fn and_all(hints: &[(usize, Option)]) -> (usize, Option) { + hints.iter().copied().fold((0, Some(0)), and) +} + +/// Take the minimum of the lower bounds and maximum of the upper bounds in the +/// `lhs` and `rhs` size hints. +#[inline] +pub fn or(lhs: (usize, Option), rhs: (usize, Option)) -> (usize, Option) { + let lower = std::cmp::min(lhs.0, rhs.0); + let upper = lhs + .1 + .and_then(|lhs| rhs.1.map(|rhs| std::cmp::max(lhs, rhs))); + (lower, upper) +} + +/// Take the maximum of the `lhs` and `rhs` size hints. +/// +/// If `hints` is empty, returns `(0, Some(0))`, aka the size of consuming +/// nothing. +#[inline] +pub fn or_all(hints: &[(usize, Option)]) -> (usize, Option) { + if let Some(head) = hints.first().copied() { + hints[1..].iter().copied().fold(head, or) + } else { + (0, Some(0)) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn and() { + assert_eq!((5, Some(5)), super::and((2, Some(2)), (3, Some(3)))); + assert_eq!((5, None), super::and((2, Some(2)), (3, None))); + assert_eq!((5, None), super::and((2, None), (3, Some(3)))); + assert_eq!((5, None), super::and((2, None), (3, None))); + } + + #[test] + fn or() { + assert_eq!((2, Some(3)), super::or((2, Some(2)), (3, Some(3)))); + assert_eq!((2, None), super::or((2, Some(2)), (3, None))); + assert_eq!((2, None), super::or((2, None), (3, Some(3)))); + assert_eq!((2, None), super::or((2, None), (3, None))); + } + + #[test] + fn and_all() { + assert_eq!((0, Some(0)), super::and_all(&[])); + assert_eq!( + (7, Some(7)), + super::and_all(&[(1, Some(1)), (2, Some(2)), (4, Some(4))]) + ); + assert_eq!( + (7, None), + super::and_all(&[(1, Some(1)), (2, Some(2)), (4, None)]) + ); + assert_eq!( + (7, None), + super::and_all(&[(1, Some(1)), (2, None), (4, Some(4))]) + ); + assert_eq!( + (7, None), + super::and_all(&[(1, None), (2, Some(2)), (4, Some(4))]) + ); + } + + #[test] + fn or_all() { + assert_eq!((0, Some(0)), super::or_all(&[])); + assert_eq!( + (1, Some(4)), + super::or_all(&[(1, Some(1)), (2, Some(2)), (4, Some(4))]) + ); + assert_eq!( + (1, None), + super::or_all(&[(1, Some(1)), (2, Some(2)), (4, None)]) + ); + assert_eq!( + (1, None), + super::or_all(&[(1, Some(1)), (2, None), (4, Some(4))]) + ); + assert_eq!( + (1, None), + super::or_all(&[(1, None), (2, Some(2)), (4, Some(4))]) + ); + } +} diff --git a/src/unstructured.rs b/src/unstructured.rs new file mode 100644 index 0000000000000000000000000000000000000000..0bfdff288170444fb39f33d5d36e4c2e272a284a --- /dev/null +++ b/src/unstructured.rs @@ -0,0 +1,1031 @@ +// Copyright © 2019 The Rust Fuzz Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Wrappers around raw, unstructured bytes. + +use crate::{Arbitrary, Error, Result}; +use std::marker::PhantomData; +use std::ops::ControlFlow; +use std::{mem, ops}; + +/// A source of unstructured data. +/// +/// An `Unstructured` helps `Arbitrary` implementations interpret raw data +/// (typically provided by a fuzzer) as a "DNA string" that describes how to +/// construct the `Arbitrary` type. The goal is that a small change to the "DNA +/// string" (the raw data wrapped by an `Unstructured`) results in a small +/// change to the generated `Arbitrary` instance. This helps a fuzzer +/// efficiently explore the `Arbitrary`'s input space. +/// +/// `Unstructured` is deterministic: given the same raw data, the same series of +/// API calls will return the same results (modulo system resource constraints, +/// like running out of memory). However, `Unstructured` does not guarantee +/// anything beyond that: it makes not guarantee that it will yield bytes from +/// the underlying data in any particular order. +/// +/// You shouldn't generally need to use an `Unstructured` unless you are writing +/// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly, +/// you should just be passing it through to nested `Arbitrary::arbitrary` +/// calls. +/// +/// # Example +/// +/// Imagine you were writing a color conversion crate. You might want to write +/// fuzz tests that take a random RGB color and assert various properties, run +/// functions and make sure nothing panics, etc. +/// +/// Below is what translating the fuzzer's raw input into an `Unstructured` and +/// using that to generate an arbitrary RGB color might look like: +/// +/// ``` +/// # #[cfg(feature = "derive")] fn foo() { +/// use arbitrary::{Arbitrary, Unstructured}; +/// +/// /// An RGB color. +/// #[derive(Arbitrary)] +/// pub struct Rgb { +/// r: u8, +/// g: u8, +/// b: u8, +/// } +/// +/// // Get the raw bytes from the fuzzer. +/// # let get_input_from_fuzzer = || &[]; +/// let raw_data: &[u8] = get_input_from_fuzzer(); +/// +/// // Wrap it in an `Unstructured`. +/// let mut unstructured = Unstructured::new(raw_data); +/// +/// // Generate an `Rgb` color and run our checks. +/// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) { +/// # let run_my_color_conversion_checks = |_| {}; +/// run_my_color_conversion_checks(rgb); +/// } +/// # } +/// ``` +pub struct Unstructured<'a> { + data: &'a [u8], +} + +impl<'a> Unstructured<'a> { + /// Create a new `Unstructured` from the given raw data. + /// + /// # Example + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let u = Unstructured::new(&[1, 2, 3, 4]); + /// ``` + pub fn new(data: &'a [u8]) -> Self { + Unstructured { data } + } + + /// Get the number of remaining bytes of underlying data that are still + /// available. + /// + /// # Example + /// + /// ``` + /// use arbitrary::{Arbitrary, Unstructured}; + /// + /// let mut u = Unstructured::new(&[1, 2, 3]); + /// + /// // Initially have three bytes of data. + /// assert_eq!(u.len(), 3); + /// + /// // Generating a `bool` consumes one byte from the underlying data, so + /// // we are left with two bytes afterwards. + /// let _ = bool::arbitrary(&mut u); + /// assert_eq!(u.len(), 2); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + /// Is the underlying unstructured data exhausted? + /// + /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`. + /// + /// # Example + /// + /// ``` + /// use arbitrary::{Arbitrary, Unstructured}; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4]); + /// + /// // Initially, we are not empty. + /// assert!(!u.is_empty()); + /// + /// // Generating a `u32` consumes all four bytes of the underlying data, so + /// // we become empty afterwards. + /// let _ = u32::arbitrary(&mut u); + /// assert!(u.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Generate an arbitrary instance of `A`. + /// + /// This is simply a helper method that is equivalent to `::arbitrary(self)`. This helper is a little bit more concise, + /// and can be used in situations where Rust's type inference will figure + /// out what `A` should be. + /// + /// # Example + /// + /// ``` + /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> { + /// use arbitrary::{Arbitrary, Unstructured}; + /// + /// #[derive(Arbitrary)] + /// struct MyType { + /// // ... + /// } + /// + /// fn do_stuff(value: MyType) { + /// # let _ = value; + /// // ... + /// } + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4]); + /// + /// // Rust's type inference can figure out that `value` should be of type + /// // `MyType` here: + /// let value = u.arbitrary()?; + /// do_stuff(value); + /// # Ok(()) } + /// ``` + pub fn arbitrary(&mut self) -> Result + where + A: Arbitrary<'a>, + { + >::arbitrary(self) + } + + /// Get the number of elements to insert when building up a collection of + /// arbitrary `ElementType`s. + /// + /// This uses the [`::size_hint`][crate::Arbitrary::size_hint] method to smartly + /// choose a length such that we most likely have enough underlying bytes to + /// construct that many arbitrary `ElementType`s. + /// + /// This should only be called within an `Arbitrary` implementation. + /// + /// # Example + /// + /// ``` + /// use arbitrary::{Arbitrary, Result, Unstructured}; + /// # pub struct MyCollection { _t: std::marker::PhantomData } + /// # impl MyCollection { + /// # pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } } + /// # pub fn insert(&mut self, element: T) {} + /// # } + /// + /// impl<'a, T> Arbitrary<'a> for MyCollection + /// where + /// T: Arbitrary<'a>, + /// { + /// fn arbitrary(u: &mut Unstructured<'a>) -> Result { + /// // Get the number of `T`s we should insert into our collection. + /// let len = u.arbitrary_len::()?; + /// + /// // And then create a collection of that length! + /// let mut my_collection = MyCollection::with_capacity(len); + /// for _ in 0..len { + /// let element = T::arbitrary(u)?; + /// my_collection.insert(element); + /// } + /// + /// Ok(my_collection) + /// } + /// } + /// ``` + pub fn arbitrary_len(&mut self) -> Result + where + ElementType: Arbitrary<'a>, + { + let byte_size = self.arbitrary_byte_size()?; + let (lower, upper) = ::size_hint(0); + let elem_size = upper.unwrap_or(lower * 2); + let elem_size = std::cmp::max(1, elem_size); + Ok(byte_size / elem_size) + } + + fn arbitrary_byte_size(&mut self) -> Result { + if self.data.is_empty() { + Ok(0) + } else if self.data.len() == 1 { + self.data = &[]; + Ok(0) + } else { + // Take lengths from the end of the data, since the `libFuzzer` folks + // found that this lets fuzzers more efficiently explore the input + // space. + // + // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97 + + // We only consume as many bytes as necessary to cover the entire + // range of the byte string. + // Note: We cast to u64 so we don't overflow when checking std::u32::MAX + 4 on 32-bit archs + let len = if self.data.len() as u64 <= std::u8::MAX as u64 + 1 { + let bytes = 1; + let max_size = self.data.len() - bytes; + let (rest, for_size) = self.data.split_at(max_size); + self.data = rest; + Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize + } else if self.data.len() as u64 <= std::u16::MAX as u64 + 2 { + let bytes = 2; + let max_size = self.data.len() - bytes; + let (rest, for_size) = self.data.split_at(max_size); + self.data = rest; + Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize + } else if self.data.len() as u64 <= std::u32::MAX as u64 + 4 { + let bytes = 4; + let max_size = self.data.len() - bytes; + let (rest, for_size) = self.data.split_at(max_size); + self.data = rest; + Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize + } else { + let bytes = 8; + let max_size = self.data.len() - bytes; + let (rest, for_size) = self.data.split_at(max_size); + self.data = rest; + Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize + }; + + Ok(len) + } + } + + /// Generate an integer within the given range. + /// + /// Do not use this to generate the size of a collection. Use + /// `arbitrary_len` instead. + /// + /// # Panics + /// + /// Panics if `range.start > range.end`. That is, the given range must be + /// non-empty. + /// + /// # Example + /// + /// ``` + /// use arbitrary::{Arbitrary, Unstructured}; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4]); + /// + /// let x: i32 = u.int_in_range(-5_000..=-1_000) + /// .expect("constructed `u` with enough bytes to generate an `i32`"); + /// + /// assert!(-5_000 <= x); + /// assert!(x <= -1_000); + /// ``` + pub fn int_in_range(&mut self, range: ops::RangeInclusive) -> Result + where + T: Int, + { + let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?; + self.data = &self.data[bytes_consumed..]; + Ok(result) + } + + fn int_in_range_impl( + range: ops::RangeInclusive, + mut bytes: impl Iterator, + ) -> Result<(T, usize)> + where + T: Int, + { + let start = *range.start(); + let end = *range.end(); + assert!( + start <= end, + "`arbitrary::Unstructured::int_in_range` requires a non-empty range" + ); + + // When there is only one possible choice, don't waste any entropy from + // the underlying data. + if start == end { + return Ok((start, 0)); + } + + // From here on out we work with the unsigned representation. All of the + // operations performed below work out just as well whether or not `T` + // is a signed or unsigned integer. + let start = start.to_unsigned(); + let end = end.to_unsigned(); + + let delta = end.wrapping_sub(start); + debug_assert_ne!(delta, T::Unsigned::ZERO); + + // Compute an arbitrary integer offset from the start of the range. We + // do this by consuming `size_of(T)` bytes from the input to create an + // arbitrary integer and then clamping that int into our range bounds + // with a modulo operation. + let mut arbitrary_int = T::Unsigned::ZERO; + let mut bytes_consumed: usize = 0; + + while (bytes_consumed < mem::size_of::()) + && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO + { + let byte = match bytes.next() { + None => break, + Some(b) => b, + }; + bytes_consumed += 1; + + // Combine this byte into our arbitrary integer, but avoid + // overflowing the shift for `u8` and `i8`. + arbitrary_int = if mem::size_of::() == 1 { + T::Unsigned::from_u8(byte) + } else { + (arbitrary_int << 8) | T::Unsigned::from_u8(byte) + }; + } + + let offset = if delta == T::Unsigned::MAX { + arbitrary_int + } else { + arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap()) + }; + + // Finally, we add `start` to our offset from `start` to get the result + // actual value within the range. + let result = start.wrapping_add(offset); + + // And convert back to our maybe-signed representation. + let result = T::from_unsigned(result); + debug_assert!(*range.start() <= result); + debug_assert!(result <= *range.end()); + + Ok((result, bytes_consumed)) + } + + /// Choose one of the given choices. + /// + /// This should only be used inside of `Arbitrary` implementations. + /// + /// Returns an error if there is not enough underlying data to make a + /// choice or if no choices are provided. + /// + /// # Examples + /// + /// Selecting from an array of choices: + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); + /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; + /// + /// let choice = u.choose(&choices).unwrap(); + /// + /// println!("chose {}", choice); + /// ``` + /// + /// An error is returned if no choices are provided: + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); + /// let choices: [char; 0] = []; + /// + /// let result = u.choose(&choices); + /// + /// assert!(result.is_err()); + /// ``` + pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> { + let idx = self.choose_index(choices.len())?; + Ok(&choices[idx]) + } + + /// Choose a value in `0..len`. + /// + /// Returns an error if the `len` is zero. + /// + /// # Examples + /// + /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation. + /// + /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); + /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g']; + /// let mut to_permute = &mut permutation[..]; + /// while to_permute.len() > 1 { + /// let idx = u.choose_index(to_permute.len()).unwrap(); + /// to_permute.swap(0, idx); + /// to_permute = &mut to_permute[1..]; + /// } + /// + /// println!("permutation: {:?}", permutation); + /// ``` + /// + /// An error is returned if the length is zero: + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]); + /// let array: [i32; 0] = []; + /// + /// let result = u.choose_index(array.len()); + /// + /// assert!(result.is_err()); + /// ``` + pub fn choose_index(&mut self, len: usize) -> Result { + if len == 0 { + return Err(Error::EmptyChoose); + } + let idx = self.int_in_range(0..=len - 1)?; + Ok(idx) + } + + /// Generate a boolean according to the given ratio. + /// + /// # Panics + /// + /// Panics when the numerator and denominator do not meet these constraints: + /// + /// * `0 < numerator <= denominator` + /// + /// # Example + /// + /// Generate a boolean that is `true` five sevenths of the time: + /// + /// ``` + /// # fn foo() -> arbitrary::Result<()> { + /// use arbitrary::Unstructured; + /// + /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]; + /// let mut u = Unstructured::new(&my_data); + /// + /// if u.ratio(5, 7)? { + /// // Take this branch 5/7 of the time. + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn ratio(&mut self, numerator: T, denominator: T) -> Result + where + T: Int, + { + assert!(T::ZERO < numerator); + assert!(numerator <= denominator); + let x = self.int_in_range(T::ONE..=denominator)?; + Ok(x <= numerator) + } + + /// Fill a `buffer` with bytes from the underlying raw data. + /// + /// This should only be called within an `Arbitrary` implementation. This is + /// a very low-level operation. You should generally prefer calling nested + /// `Arbitrary` implementations like `>::arbitrary` and + /// `String::arbitrary` over using this method directly. + /// + /// If this `Unstructured` does not have enough underlying data to fill the + /// whole `buffer`, it pads the buffer out with zeros. + /// + /// # Example + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4]); + /// + /// let mut buf = [0; 2]; + /// + /// assert!(u.fill_buffer(&mut buf).is_ok()); + /// assert_eq!(buf, [1, 2]); + /// + /// assert!(u.fill_buffer(&mut buf).is_ok()); + /// assert_eq!(buf, [3, 4]); + /// + /// assert!(u.fill_buffer(&mut buf).is_ok()); + /// assert_eq!(buf, [0, 0]); + /// ``` + pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> { + let n = std::cmp::min(buffer.len(), self.data.len()); + buffer[..n].copy_from_slice(&self.data[..n]); + for byte in buffer[n..].iter_mut() { + *byte = 0; + } + self.data = &self.data[n..]; + Ok(()) + } + + /// Provide `size` bytes from the underlying raw data. + /// + /// This should only be called within an `Arbitrary` implementation. This is + /// a very low-level operation. You should generally prefer calling nested + /// `Arbitrary` implementations like `>::arbitrary` and + /// `String::arbitrary` over using this method directly. + /// + /// # Example + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3, 4]); + /// + /// assert!(u.bytes(2).unwrap() == &[1, 2]); + /// assert!(u.bytes(2).unwrap() == &[3, 4]); + /// ``` + pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> { + if self.data.len() < size { + return Err(Error::NotEnoughData); + } + + let (for_buf, rest) = self.data.split_at(size); + self.data = rest; + Ok(for_buf) + } + + /// Peek at `size` number of bytes of the underlying raw input. + /// + /// Does not consume the bytes, only peeks at them. + /// + /// Returns `None` if there are not `size` bytes left in the underlying raw + /// input. + /// + /// # Example + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let u = Unstructured::new(&[1, 2, 3]); + /// + /// assert_eq!(u.peek_bytes(0).unwrap(), []); + /// assert_eq!(u.peek_bytes(1).unwrap(), [1]); + /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]); + /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]); + /// + /// assert!(u.peek_bytes(4).is_none()); + /// ``` + pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> { + self.data.get(..size) + } + + /// Consume all of the rest of the remaining underlying bytes. + /// + /// Returns a slice of all the remaining, unconsumed bytes. + /// + /// # Example + /// + /// ``` + /// use arbitrary::Unstructured; + /// + /// let mut u = Unstructured::new(&[1, 2, 3]); + /// + /// let mut remaining = u.take_rest(); + /// + /// assert_eq!(remaining, [1, 2, 3]); + /// ``` + pub fn take_rest(mut self) -> &'a [u8] { + mem::take(&mut self.data) + } + + /// Provide an iterator over elements for constructing a collection + /// + /// This is useful for implementing [`Arbitrary::arbitrary`] on collections + /// since the implementation is simply `u.arbitrary_iter()?.collect()` + pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>( + &'b mut self, + ) -> Result> { + Ok(ArbitraryIter { + u: &mut *self, + _marker: PhantomData, + }) + } + + /// Provide an iterator over elements for constructing a collection from + /// all the remaining bytes. + /// + /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections + /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()` + pub fn arbitrary_take_rest_iter>( + self, + ) -> Result> { + let (lower, upper) = ElementType::size_hint(0); + + let elem_size = upper.unwrap_or(lower * 2); + let elem_size = std::cmp::max(1, elem_size); + let size = self.len() / elem_size; + Ok(ArbitraryTakeRestIter { + size, + u: Some(self), + _marker: PhantomData, + }) + } + + /// Call the given function an arbitrary number of times. + /// + /// The function is given this `Unstructured` so that it can continue to + /// generate arbitrary data and structures. + /// + /// You may optionaly specify minimum and maximum bounds on the number of + /// times the function is called. + /// + /// You may break out of the loop early by returning + /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return + /// `Ok(std::ops::ControlFlow::Continue)`. + /// + /// # Panics + /// + /// Panics if `min > max`. + /// + /// # Example + /// + /// Call a closure that generates an arbitrary type inside a context an + /// arbitrary number of times: + /// + /// ``` + /// use arbitrary::{Result, Unstructured}; + /// use std::ops::ControlFlow; + /// + /// enum Type { + /// /// A boolean type. + /// Bool, + /// + /// /// An integer type. + /// Int, + /// + /// /// A list of the `i`th type in this type's context. + /// List(usize), + /// } + /// + /// fn arbitrary_types_context(u: &mut Unstructured) -> Result> { + /// let mut context = vec![]; + /// + /// u.arbitrary_loop(Some(10), Some(20), |u| { + /// let num_choices = if context.is_empty() { + /// 2 + /// } else { + /// 3 + /// }; + /// let ty = match u.int_in_range::(1..=num_choices)? { + /// 1 => Type::Bool, + /// 2 => Type::Int, + /// 3 => Type::List(u.int_in_range(0..=context.len() - 1)?), + /// _ => unreachable!(), + /// }; + /// context.push(ty); + /// Ok(ControlFlow::Continue(())) + /// })?; + /// + /// // The number of loop iterations are constrained by the min/max + /// // bounds that we provided. + /// assert!(context.len() >= 10); + /// assert!(context.len() <= 20); + /// + /// Ok(context) + /// } + /// ``` + pub fn arbitrary_loop( + &mut self, + min: Option, + max: Option, + mut f: impl FnMut(&mut Self) -> Result>, + ) -> Result<()> { + let min = min.unwrap_or(0); + let max = max.unwrap_or(u32::MAX); + + for _ in 0..self.int_in_range(min..=max)? { + match f(self)? { + ControlFlow::Continue(_) => continue, + ControlFlow::Break(_) => break, + } + } + + Ok(()) + } +} + +/// Utility iterator produced by [`Unstructured::arbitrary_iter`] +pub struct ArbitraryIter<'a, 'b, ElementType> { + u: &'b mut Unstructured<'a>, + _marker: PhantomData, +} + +impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> { + type Item = Result; + fn next(&mut self) -> Option> { + let keep_going = self.u.arbitrary().unwrap_or(false); + if keep_going { + Some(Arbitrary::arbitrary(self.u)) + } else { + None + } + } +} + +/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`] +pub struct ArbitraryTakeRestIter<'a, ElementType> { + u: Option>, + size: usize, + _marker: PhantomData, +} + +impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> { + type Item = Result; + fn next(&mut self) -> Option> { + if let Some(mut u) = self.u.take() { + if self.size == 1 { + Some(Arbitrary::arbitrary_take_rest(u)) + } else if self.size == 0 { + None + } else { + self.size -= 1; + let ret = Arbitrary::arbitrary(&mut u); + self.u = Some(u); + Some(ret) + } + } else { + None + } + } +} + +/// A trait that is implemented for all of the primitive integers: +/// +/// * `u8` +/// * `u16` +/// * `u32` +/// * `u64` +/// * `u128` +/// * `usize` +/// * `i8` +/// * `i16` +/// * `i32` +/// * `i64` +/// * `i128` +/// * `isize` +/// +/// Don't implement this trait yourself. +pub trait Int: + Copy + + std::fmt::Debug + + PartialOrd + + Ord + + ops::Sub + + ops::Rem + + ops::Shr + + ops::Shl + + ops::BitOr +{ + #[doc(hidden)] + type Unsigned: Int; + + #[doc(hidden)] + const ZERO: Self; + + #[doc(hidden)] + const ONE: Self; + + #[doc(hidden)] + const MAX: Self; + + #[doc(hidden)] + fn from_u8(b: u8) -> Self; + + #[doc(hidden)] + fn from_usize(u: usize) -> Self; + + #[doc(hidden)] + fn checked_add(self, rhs: Self) -> Option; + + #[doc(hidden)] + fn wrapping_add(self, rhs: Self) -> Self; + + #[doc(hidden)] + fn wrapping_sub(self, rhs: Self) -> Self; + + #[doc(hidden)] + fn to_unsigned(self) -> Self::Unsigned; + + #[doc(hidden)] + fn from_unsigned(unsigned: Self::Unsigned) -> Self; +} + +macro_rules! impl_int { + ( $( $ty:ty : $unsigned_ty: ty ; )* ) => { + $( + impl Int for $ty { + type Unsigned = $unsigned_ty; + + const ZERO: Self = 0; + + const ONE: Self = 1; + + const MAX: Self = Self::MAX; + + fn from_u8(b: u8) -> Self { + b as Self + } + + fn from_usize(u: usize) -> Self { + u as Self + } + + fn checked_add(self, rhs: Self) -> Option { + <$ty>::checked_add(self, rhs) + } + + fn wrapping_add(self, rhs: Self) -> Self { + <$ty>::wrapping_add(self, rhs) + } + + fn wrapping_sub(self, rhs: Self) -> Self { + <$ty>::wrapping_sub(self, rhs) + } + + fn to_unsigned(self) -> Self::Unsigned { + self as $unsigned_ty + } + + fn from_unsigned(unsigned: $unsigned_ty) -> Self { + unsigned as Self + } + } + )* + } +} + +impl_int! { + u8: u8; + u16: u16; + u32: u32; + u64: u64; + u128: u128; + usize: usize; + i8: u8; + i16: u16; + i32: u32; + i64: u64; + i128: u128; + isize: usize; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_byte_size() { + let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); + // Should take one byte off the end + assert_eq!(u.arbitrary_byte_size().unwrap(), 6); + assert_eq!(u.len(), 9); + let mut v = vec![]; + v.resize(260, 0); + v.push(1); + v.push(4); + let mut u = Unstructured::new(&v); + // Should read two bytes off the end + assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104); + assert_eq!(u.len(), 260); + } + + #[test] + fn int_in_range_of_one() { + let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]); + let x = u.int_in_range(0..=0).unwrap(); + assert_eq!(x, 0); + let choice = *u.choose(&[42]).unwrap(); + assert_eq!(choice, 42) + } + + #[test] + fn int_in_range_uses_minimal_amount_of_bytes() { + let mut u = Unstructured::new(&[1, 2]); + assert_eq!(1, u.int_in_range::(0..=u8::MAX).unwrap()); + assert_eq!(u.len(), 1); + + let mut u = Unstructured::new(&[1, 2]); + assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32).unwrap()); + assert_eq!(u.len(), 1); + + let mut u = Unstructured::new(&[1]); + assert_eq!(1, u.int_in_range::(0..=u8::MAX as u32 + 1).unwrap()); + assert!(u.is_empty()); + } + + #[test] + fn int_in_range_in_bounds() { + for input in u8::MIN..=u8::MAX { + let input = [input]; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(1..=u8::MAX).unwrap(); + assert_ne!(x, 0); + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); + assert_ne!(x, u8::MAX); + } + } + + #[test] + fn int_in_range_covers_unsigned_range() { + // Test that we generate all values within the range given to + // `int_in_range`. + + let mut full = [false; u8::MAX as usize + 1]; + let mut no_zero = [false; u8::MAX as usize]; + let mut no_max = [false; u8::MAX as usize]; + let mut narrow = [false; 10]; + + for input in u8::MIN..=u8::MAX { + let input = [input]; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(0..=u8::MAX).unwrap(); + full[x as usize] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(1..=u8::MAX).unwrap(); + no_zero[x as usize - 1] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(0..=u8::MAX - 1).unwrap(); + no_max[x as usize] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(100..=109).unwrap(); + narrow[x as usize - 100] = true; + } + + for (i, covered) in full.iter().enumerate() { + assert!(covered, "full[{}] should have been generated", i); + } + for (i, covered) in no_zero.iter().enumerate() { + assert!(covered, "no_zero[{}] should have been generated", i); + } + for (i, covered) in no_max.iter().enumerate() { + assert!(covered, "no_max[{}] should have been generated", i); + } + for (i, covered) in narrow.iter().enumerate() { + assert!(covered, "narrow[{}] should have been generated", i); + } + } + + #[test] + fn int_in_range_covers_signed_range() { + // Test that we generate all values within the range given to + // `int_in_range`. + + let mut full = [false; u8::MAX as usize + 1]; + let mut no_min = [false; u8::MAX as usize]; + let mut no_max = [false; u8::MAX as usize]; + let mut narrow = [false; 21]; + + let abs_i8_min: isize = 128; + + for input in 0..=u8::MAX { + let input = [input]; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap(); + full[(x as isize + abs_i8_min) as usize] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap(); + no_min[(x as isize + abs_i8_min - 1) as usize] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap(); + no_max[(x as isize + abs_i8_min) as usize] = true; + + let mut u = Unstructured::new(&input); + let x = u.int_in_range(-10..=10).unwrap(); + narrow[(x as isize + 10) as usize] = true; + } + + for (i, covered) in full.iter().enumerate() { + assert!(covered, "full[{}] should have been generated", i); + } + for (i, covered) in no_min.iter().enumerate() { + assert!(covered, "no_min[{}] should have been generated", i); + } + for (i, covered) in no_max.iter().enumerate() { + assert!(covered, "no_max[{}] should have been generated", i); + } + for (i, covered) in narrow.iter().enumerate() { + assert!(covered, "narrow[{}] should have been generated", i); + } + } +} diff --git a/tests/bound.rs b/tests/bound.rs new file mode 100644 index 0000000000000000000000000000000000000000..7a772ac71561c60f8cb5878d14ef9631f8ea6099 --- /dev/null +++ b/tests/bound.rs @@ -0,0 +1,142 @@ +#![cfg(feature = "derive")] + +use arbitrary::{Arbitrary, Unstructured}; + +fn arbitrary_from<'a, T: Arbitrary<'a>>(input: &'a [u8]) -> T { + let mut buf = Unstructured::new(input); + T::arbitrary(&mut buf).expect("can create arbitrary instance OK") +} + +/// This wrapper trait *implies* `Arbitrary`, but the compiler isn't smart enough to work that out +/// so when using this wrapper we *must* opt-out of the auto-generated `T: Arbitrary` bounds. +pub trait WrapperTrait: for<'a> Arbitrary<'a> {} + +impl WrapperTrait for u32 {} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: WrapperTrait")] +struct GenericSingleBound { + t: T, +} + +#[test] +fn single_bound() { + let v: GenericSingleBound = arbitrary_from(&[0, 0, 0, 0]); + assert_eq!(v.t, 0); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: WrapperTrait, U: WrapperTrait")] +struct GenericMultipleBoundsSingleAttribute { + t: T, + u: U, +} + +#[test] +fn multiple_bounds_single_attribute() { + let v: GenericMultipleBoundsSingleAttribute = + arbitrary_from(&[1, 0, 0, 0, 2, 0, 0, 0]); + assert_eq!(v.t, 1); + assert_eq!(v.u, 2); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: WrapperTrait")] +#[arbitrary(bound = "U: Default")] +struct GenericMultipleArbitraryAttributes { + t: T, + #[arbitrary(default)] + u: U, +} + +#[test] +fn multiple_arbitrary_attributes() { + let v: GenericMultipleArbitraryAttributes = arbitrary_from(&[1, 0, 0, 0]); + assert_eq!(v.t, 1); + assert_eq!(v.u, 0); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: WrapperTrait", bound = "U: Default")] +struct GenericMultipleBoundAttributes { + t: T, + #[arbitrary(default)] + u: U, +} + +#[test] +fn multiple_bound_attributes() { + let v: GenericMultipleBoundAttributes = arbitrary_from(&[1, 0, 0, 0]); + assert_eq!(v.t, 1); + assert_eq!(v.u, 0); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: WrapperTrait", bound = "U: Default")] +#[arbitrary(bound = "V: WrapperTrait, W: Default")] +struct GenericMultipleArbitraryAndBoundAttributes< + T: WrapperTrait, + U: Default, + V: WrapperTrait, + W: Default, +> { + t: T, + #[arbitrary(default)] + u: U, + v: V, + #[arbitrary(default)] + w: W, +} + +#[test] +fn multiple_arbitrary_and_bound_attributes() { + let v: GenericMultipleArbitraryAndBoundAttributes = + arbitrary_from(&[1, 0, 0, 0, 2, 0, 0, 0]); + assert_eq!(v.t, 1); + assert_eq!(v.u, 0); + assert_eq!(v.v, 2); + assert_eq!(v.w, 0); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "T: Default")] +struct GenericDefault { + #[arbitrary(default)] + x: T, +} + +#[test] +fn default_bound() { + // We can write a generic func without any `Arbitrary` bound. + fn generic_default() -> GenericDefault { + arbitrary_from(&[]) + } + + assert_eq!(generic_default::().x, 0); + assert_eq!(generic_default::().x, String::new()); + assert_eq!(generic_default::>().x, Vec::new()); +} + +#[derive(Arbitrary)] +#[arbitrary()] +struct EmptyArbitraryAttribute { + t: u32, +} + +#[test] +fn empty_arbitrary_attribute() { + let v: EmptyArbitraryAttribute = arbitrary_from(&[1, 0, 0, 0]); + assert_eq!(v.t, 1); +} + +#[derive(Arbitrary)] +#[arbitrary(bound = "")] +struct EmptyBoundAttribute { + t: u32, +} + +#[test] +fn empty_bound_attribute() { + let v: EmptyBoundAttribute = arbitrary_from(&[1, 0, 0, 0]); + assert_eq!(v.t, 1); +} diff --git a/tests/derive.rs b/tests/derive.rs new file mode 100644 index 0000000000000000000000000000000000000000..f29d227b890046d5ffd4e3db6a52a30da399af8e --- /dev/null +++ b/tests/derive.rs @@ -0,0 +1,278 @@ +#![cfg(feature = "derive")] +// Various structs/fields that we are deriving `Arbitrary` for aren't actually +// used except to exercise the derive. +#![allow(dead_code)] + +use arbitrary::*; + +fn arbitrary_from<'a, T: Arbitrary<'a>>(input: &'a [u8]) -> T { + let mut buf = Unstructured::new(input); + T::arbitrary(&mut buf).expect("can create arbitrary instance OK") +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Arbitrary)] +pub struct Rgb { + pub r: u8, + pub g: u8, + pub b: u8, +} + +#[test] +fn struct_with_named_fields() { + let rgb: Rgb = arbitrary_from(&[4, 5, 6]); + assert_eq!(rgb.r, 4); + assert_eq!(rgb.g, 5); + assert_eq!(rgb.b, 6); + + assert_eq!((3, Some(3)), ::size_hint(0)); +} + +#[derive(Copy, Clone, Debug, Arbitrary)] +struct MyTupleStruct(u8, bool); + +#[test] +fn tuple_struct() { + let s: MyTupleStruct = arbitrary_from(&[43, 42]); + assert_eq!(s.0, 43); + assert_eq!(s.1, false); + + let s: MyTupleStruct = arbitrary_from(&[42, 43]); + assert_eq!(s.0, 42); + assert_eq!(s.1, true); + + assert_eq!((2, Some(2)), ::size_hint(0)); +} + +#[derive(Clone, Debug, Arbitrary)] +struct EndingInVec(u8, bool, u32, Vec); +#[derive(Clone, Debug, Arbitrary)] +struct EndingInString(u8, bool, u32, String); + +#[test] +fn test_take_rest() { + let bytes = [1, 1, 1, 2, 3, 4, 5, 6, 7, 8]; + let s1 = EndingInVec::arbitrary_take_rest(Unstructured::new(&bytes)).unwrap(); + let s2 = EndingInString::arbitrary_take_rest(Unstructured::new(&bytes)).unwrap(); + assert_eq!(s1.0, 1); + assert_eq!(s2.0, 1); + assert_eq!(s1.1, true); + assert_eq!(s2.1, true); + assert_eq!(s1.2, 0x4030201); + assert_eq!(s2.2, 0x4030201); + assert_eq!(s1.3, vec![0x605, 0x807]); + assert_eq!(s2.3, "\x05\x06\x07\x08"); +} + +#[derive(Copy, Clone, Debug, Arbitrary)] +enum MyEnum { + Unit, + Tuple(u8, u16), + Struct { a: u32, b: (bool, u64) }, +} + +#[test] +fn derive_enum() { + let mut raw = vec![ + // The choice of which enum variant takes 4 bytes. + 1, 2, 3, 4, + // And then we need up to 13 bytes for creating `MyEnum::Struct`, the + // largest variant. + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + ]; + + let mut saw_unit = false; + let mut saw_tuple = false; + let mut saw_struct = false; + + for i in 0..=255 { + // Choose different variants each iteration. + for el in &mut raw[..4] { + *el = i; + } + + let e: MyEnum = arbitrary_from(&raw); + + match e { + MyEnum::Unit => { + saw_unit = true; + } + MyEnum::Tuple(a, b) => { + saw_tuple = true; + assert_eq!(a, arbitrary_from(&raw[4..5])); + assert_eq!(b, arbitrary_from(&raw[5..])); + } + MyEnum::Struct { a, b } => { + saw_struct = true; + assert_eq!(a, arbitrary_from(&raw[4..8])); + assert_eq!(b, arbitrary_from(&raw[8..])); + } + } + } + + assert!(saw_unit); + assert!(saw_tuple); + assert!(saw_struct); + + assert_eq!((4, Some(17)), ::size_hint(0)); +} + +#[derive(Arbitrary, Debug)] +enum RecursiveTree { + Leaf, + Node { + left: Box, + right: Box, + }, +} + +#[test] +fn recursive() { + let raw = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; + let _rec: RecursiveTree = arbitrary_from(&raw); + + let (lower, upper) = ::size_hint(0); + assert_eq!(lower, 4, "need a u32 for the discriminant at minimum"); + assert!( + upper.is_none(), + "potentially infinitely recursive, so no upper bound" + ); +} + +#[derive(Arbitrary, Debug)] +struct Generic { + inner: T, +} + +#[test] +fn generics() { + let raw = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; + let gen: Generic = arbitrary_from(&raw); + assert!(gen.inner); + + let (lower, upper) = as Arbitrary>::size_hint(0); + assert_eq!(lower, 4); + assert_eq!(upper, Some(4)); +} + +#[derive(Arbitrary, Debug)] +struct OneLifetime<'a> { + alpha: &'a str, +} + +#[test] +fn one_lifetime() { + // Last byte is used for length + let raw: Vec = vec![97, 98, 99, 100, 3]; + let lifetime: OneLifetime = arbitrary_from(&raw); + assert_eq!("abc", lifetime.alpha); + + let (lower, upper) = ::size_hint(0); + assert_eq!(lower, 0); + assert_eq!(upper, None); +} + +#[derive(Arbitrary, Debug)] +struct TwoLifetimes<'a, 'b> { + alpha: &'a str, + beta: &'b str, +} + +#[test] +fn two_lifetimes() { + // Last byte is used for length + let raw: Vec = vec![97, 98, 99, 100, 101, 102, 103, 3]; + let lifetime: TwoLifetimes = arbitrary_from(&raw); + assert_eq!("abc", lifetime.alpha); + assert_eq!("def", lifetime.beta); + + let (lower, upper) = ::size_hint(0); + assert_eq!(lower, 0); + assert_eq!(upper, None); +} + +#[test] +fn recursive_and_empty_input() { + // None of the following derives should result in a stack overflow. See + // https://github.com/rust-fuzz/arbitrary/issues/107 for details. + + #[derive(Debug, Arbitrary)] + enum Nat { + Succ(Box), + Zero, + } + + let _ = Nat::arbitrary(&mut Unstructured::new(&[])); + + #[derive(Debug, Arbitrary)] + enum Nat2 { + Zero, + Succ(Box), + } + + let _ = Nat2::arbitrary(&mut Unstructured::new(&[])); + + #[derive(Debug, Arbitrary)] + struct Nat3 { + f: Option>, + } + + let _ = Nat3::arbitrary(&mut Unstructured::new(&[])); + + #[derive(Debug, Arbitrary)] + struct Nat4(Option>); + + let _ = Nat4::arbitrary(&mut Unstructured::new(&[])); + + #[derive(Debug, Arbitrary)] + enum Nat5 { + Zero, + Succ { f: Box }, + } + + let _ = Nat5::arbitrary(&mut Unstructured::new(&[])); +} + +#[test] +fn test_field_attributes() { + // A type that DOES NOT implement Arbitrary + #[derive(Debug)] + struct Weight(u8); + + #[derive(Debug, Arbitrary)] + struct Parcel { + #[arbitrary(with = arbitrary_weight)] + weight: Weight, + + #[arbitrary(default)] + width: u8, + + #[arbitrary(value = 2 + 2)] + length: u8, + + height: u8, + + #[arbitrary(with = |u: &mut Unstructured| u.int_in_range(0..=100))] + price: u8, + } + + fn arbitrary_weight(u: &mut Unstructured) -> arbitrary::Result { + u.int_in_range(45..=56).map(Weight) + } + + let parcel: Parcel = arbitrary_from(&[6, 199, 17]); + + // 45 + 6 = 51 + assert_eq!(parcel.weight.0, 51); + + // u8::default() + assert_eq!(parcel.width, 0); + + // 2 + 2 = 4 + assert_eq!(parcel.length, 4); + + // 199 is the 2nd byte used by arbitrary + assert_eq!(parcel.height, 199); + + // 17 is the 3rd byte used by arbitrary + assert_eq!(parcel.price, 17); +} diff --git a/tests/path.rs b/tests/path.rs new file mode 100644 index 0000000000000000000000000000000000000000..c42ec0a11346742385e1c469d42ace36a719986e --- /dev/null +++ b/tests/path.rs @@ -0,0 +1,32 @@ +#![cfg(feature = "derive")] +// Various structs/fields that we are deriving `Arbitrary` for aren't actually +// used except to show off the derive. +#![allow(dead_code)] + +// Regression test for ensuring the derives work without Arbitrary being imported + +#[derive(arbitrary::Arbitrary, Clone, Debug)] +pub struct Struct { + x: u8, + y: u8, +} + +#[derive(arbitrary::Arbitrary, Clone, Debug)] +pub struct Tuple(u8); + +#[derive(arbitrary::Arbitrary, Clone, Debug)] +pub struct Unit(u8); + +#[derive(arbitrary::Arbitrary, Clone, Debug)] +pub enum Enum { + X(u8), + Y(u8), +} + +#[derive(arbitrary::Arbitrary, Clone, Debug)] +struct EndingInVec(u8, bool, u32, Vec); + +#[derive(arbitrary::Arbitrary, Debug)] +struct Generic { + inner: T, +}