aboutsummaryrefslogtreecommitdiff
path: root/instrumentation
diff options
context:
space:
mode:
authorvan Hauser <vh@thc.org>2020-11-06 09:37:14 +0100
committerGitHub <noreply@github.com>2020-11-06 09:37:14 +0100
commit3b799c09cd68bb68b26784261f1fbaa3e737c747 (patch)
treee581c3689d5fe231678464bb6bd48cab75c7db41 /instrumentation
parent5ee63a6e6267e448342ccb28cc8d3c0d34ffc1cd (diff)
parent50c98445fe74b92d2e6ab784def3e8b26a662b36 (diff)
downloadafl++-3b799c09cd68bb68b26784261f1fbaa3e737c747.tar.gz
Merge pull request #594 from AFLplusplus/dev
push to stable
Diffstat (limited to 'instrumentation')
-rw-r--r--instrumentation/COPYING3674
-rw-r--r--instrumentation/LLVMInsTrim.so.cc598
-rw-r--r--instrumentation/MarkNodes.cc481
-rw-r--r--instrumentation/MarkNodes.h12
-rw-r--r--instrumentation/README.cmplog.md42
-rw-r--r--instrumentation/README.ctx.md22
-rw-r--r--instrumentation/README.gcc_plugin.md164
-rw-r--r--instrumentation/README.instrim.md30
-rw-r--r--instrumentation/README.instrument_list.md96
-rw-r--r--instrumentation/README.laf-intel.md56
-rw-r--r--instrumentation/README.llvm.md194
-rw-r--r--instrumentation/README.lto.md290
-rw-r--r--instrumentation/README.neverzero.md35
-rw-r--r--instrumentation/README.ngram.md28
-rw-r--r--instrumentation/README.out_of_line.md21
-rw-r--r--instrumentation/README.persistent_mode.md209
-rw-r--r--instrumentation/README.snapshot.md16
-rw-r--r--instrumentation/SanitizerCoverageLTO.so.cc1613
-rw-r--r--instrumentation/SanitizerCoveragePCGUARD.so.cc1349
-rw-r--r--instrumentation/afl-compiler-rt.o.c1268
-rw-r--r--instrumentation/afl-gcc-pass.so.cc968
-rw-r--r--instrumentation/afl-llvm-common.cc575
-rw-r--r--instrumentation/afl-llvm-common.h52
-rw-r--r--instrumentation/afl-llvm-dict2file.so.cc615
-rw-r--r--instrumentation/afl-llvm-lto-instrumentation.so.cc1060
-rw-r--r--instrumentation/afl-llvm-lto-instrumentlist.so.cc147
-rw-r--r--instrumentation/afl-llvm-pass.so.cc654
-rw-r--r--instrumentation/afl-llvm-rt-lto.o.c27
-rw-r--r--instrumentation/cmplog-instructions-pass.cc292
-rw-r--r--instrumentation/cmplog-routines-pass.cc213
-rw-r--r--instrumentation/compare-transform-pass.so.cc594
-rw-r--r--instrumentation/llvm-ngram-coverage.h18
-rw-r--r--instrumentation/split-compares-pass.so.cc1365
-rw-r--r--instrumentation/split-switches-pass.so.cc450
34 files changed, 14228 insertions, 0 deletions
diff --git a/instrumentation/COPYING3 b/instrumentation/COPYING3
new file mode 100644
index 00000000..94a9ed02
--- /dev/null
+++ b/instrumentation/COPYING3
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc
new file mode 100644
index 00000000..61a420ba
--- /dev/null
+++ b/instrumentation/LLVMInsTrim.so.cc
@@ -0,0 +1,598 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
+typedef long double max_align_t;
+#endif
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/CFG.h"
+ #include "llvm/IR/Dominators.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Support/CFG.h"
+ #include "llvm/Analysis/Dominators.h"
+ #include "llvm/DebugInfo.h"
+#endif
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include <unordered_set>
+#include <random>
+#include <list>
+#include <string>
+#include <fstream>
+
+#include "MarkNodes.h"
+#include "afl-llvm-common.h"
+#include "llvm-ngram-coverage.h"
+
+#include "config.h"
+#include "debug.h"
+
+using namespace llvm;
+
+static cl::opt<bool> MarkSetOpt("markset", cl::desc("MarkSet"),
+ cl::init(false));
+static cl::opt<bool> LoopHeadOpt("loophead", cl::desc("LoopHead"),
+ cl::init(false));
+
+namespace {
+
+struct InsTrim : public ModulePass {
+
+ protected:
+ uint32_t function_minimum_size = 1;
+ char * skip_nozero = NULL;
+
+ private:
+ std::mt19937 generator;
+ int total_instr = 0;
+
+ unsigned int genLabel() {
+
+ return generator() & (MAP_SIZE - 1);
+
+ }
+
+ public:
+ static char ID;
+
+ InsTrim() : ModulePass(ID), generator(0) {
+
+ initInstrumentList();
+
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+
+ }
+
+#if LLVM_VERSION_MAJOR < 4
+ const char *
+#else
+ StringRef
+#endif
+ getPassName() const override {
+
+ return "InstTrim Instrumentation";
+
+ }
+
+#if LLVM_VERSION_MAJOR > 4 || \
+ (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
+ #define AFL_HAVE_VECTOR_INTRINSICS 1
+#endif
+
+ bool runOnModule(Module &M) override {
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+
+ SAYF(cCYA "LLVMInsTrim" VERSION cRST " by csienslab\n");
+
+ } else
+
+ be_quiet = 1;
+
+ if (getenv("AFL_DEBUG") != NULL) debug = 1;
+
+ LLVMContext &C = M.getContext();
+
+ IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+
+#if LLVM_VERSION_MAJOR < 9
+ char *neverZero_counters_str;
+ if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL)
+ if (!be_quiet) OKF("LLVM neverZero activated (by hexcoder)\n");
+#endif
+ skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
+ if (getenv("AFL_LLVM_INSTRIM_LOOPHEAD") != NULL ||
+ getenv("LOOPHEAD") != NULL) {
+
+ LoopHeadOpt = true;
+
+ }
+
+ unsigned int PrevLocSize = 0;
+ char * ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
+ if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+ char *ctx_str = getenv("AFL_LLVM_CTX");
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ unsigned int ngram_size = 0;
+ /* Decide previous location vector size (must be a power of two) */
+ VectorType *PrevLocTy = NULL;
+
+ if (ngram_size_str)
+ if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 ||
+ ngram_size > NGRAM_SIZE_MAX)
+ FATAL(
+ "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX "
+ "(%u))",
+ NGRAM_SIZE_MAX);
+
+ if (ngram_size)
+ PrevLocSize = ngram_size - 1;
+ else
+#else
+ if (ngram_size_str)
+ #ifdef LLVM_VERSION_STRING
+ FATAL(
+ "Sorry, NGRAM branch coverage is not supported with llvm version %s!",
+ LLVM_VERSION_STRING);
+ #else
+ #ifndef LLVM_VERSION_PATCH
+ FATAL(
+ "Sorry, NGRAM branch coverage is not supported with llvm version "
+ "%d.%d.%d!",
+ LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
+ #else
+ FATAL(
+ "Sorry, NGRAM branch coverage is not supported with llvm version "
+ "%d.%d.%d!",
+ LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERISON_PATCH);
+ #endif
+ #endif
+#endif
+ PrevLocSize = 1;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ // IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+ int PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
+ IntegerType *IntLocTy =
+ IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT);
+ if (ngram_size)
+ PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize
+ #if LLVM_VERSION_MAJOR >= 12
+ ,
+ false
+ #endif
+ );
+#endif
+
+ /* Get globals for the SHM region and the previous location. Note that
+ __afl_prev_loc is thread-local. */
+
+ GlobalVariable *AFLMapPtr =
+ new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+ GlobalVariable *AFLPrevLoc;
+ GlobalVariable *AFLContext = NULL;
+ LoadInst * PrevCtx = NULL; // for CTX sensitive coverage
+
+ if (ctx_str)
+#ifdef __ANDROID__
+ AFLContext = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
+#else
+ AFLContext = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx",
+ 0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size)
+ #ifdef __ANDROID__
+ AFLPrevLoc = new GlobalVariable(
+ M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+ /* Initializer */ nullptr, "__afl_prev_loc");
+ #else
+ AFLPrevLoc = new GlobalVariable(
+ M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+ /* Initializer */ nullptr, "__afl_prev_loc",
+ /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+ /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+ #endif
+ else
+#endif
+#ifdef __ANDROID__
+ AFLPrevLoc = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
+#else
+ AFLPrevLoc = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0,
+ GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ /* Create the vector shuffle mask for updating the previous block history.
+ Note that the first element of the vector will store cur_loc, so just set
+ it to undef to allow the optimizer to do its thing. */
+
+ SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)};
+
+ for (unsigned I = 0; I < PrevLocSize - 1; ++I)
+ PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+ for (int I = PrevLocSize; I < PrevLocVecSize; ++I)
+ PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
+
+ Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+#endif
+
+ // this is our default
+ MarkSetOpt = true;
+
+ ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
+ ConstantInt *One = ConstantInt::get(Int8Ty, 1);
+
+ u64 total_rs = 0;
+ u64 total_hs = 0;
+
+ scanForDangerousFunctions(&M);
+
+ for (Function &F : M) {
+
+ if (debug) {
+
+ uint32_t bb_cnt = 0;
+
+ for (auto &BB : F)
+ if (BB.size() > 0) ++bb_cnt;
+ SAYF(cMGN "[D] " cRST "Function %s size %zu %u\n",
+ F.getName().str().c_str(), F.size(), bb_cnt);
+
+ }
+
+ if (!isInInstrumentList(&F)) continue;
+
+ // if the function below our minimum size skip it (1 or 2)
+ if (F.size() < function_minimum_size) { continue; }
+
+ std::unordered_set<BasicBlock *> MS;
+ if (!MarkSetOpt) {
+
+ for (auto &BB : F) {
+
+ MS.insert(&BB);
+
+ }
+
+ total_rs += F.size();
+
+ } else {
+
+ auto Result = markNodes(&F);
+ auto RS = Result.first;
+ auto HS = Result.second;
+
+ MS.insert(RS.begin(), RS.end());
+ if (!LoopHeadOpt) {
+
+ MS.insert(HS.begin(), HS.end());
+ total_rs += MS.size();
+
+ } else {
+
+ DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeSet;
+ DominatorTreeWrapperPass * DTWP =
+ &getAnalysis<DominatorTreeWrapperPass>(F);
+ auto DT = &DTWP->getDomTree();
+
+ total_rs += RS.size();
+ total_hs += HS.size();
+
+ for (BasicBlock *BB : HS) {
+
+ bool Inserted = false;
+ for (auto BI = pred_begin(BB), BE = pred_end(BB); BI != BE; ++BI) {
+
+ auto Edge = BasicBlockEdge(*BI, BB);
+ if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) {
+
+ EdgeSet.insert({*BI, BB});
+ Inserted = true;
+ break;
+
+ }
+
+ }
+
+ if (!Inserted) {
+
+ MS.insert(BB);
+ total_rs += 1;
+ total_hs -= 1;
+
+ }
+
+ }
+
+ for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) {
+
+ auto PredBB = I->first;
+ auto SuccBB = I->second;
+ auto NewBB =
+ SplitBlockPredecessors(SuccBB, {PredBB}, ".split", DT, nullptr,
+#if LLVM_VERSION_MAJOR >= 8
+ nullptr,
+#endif
+ false);
+ MS.insert(NewBB);
+
+ }
+
+ }
+
+ for (BasicBlock &BB : F) {
+
+ if (MS.find(&BB) == MS.end()) { continue; }
+ IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size) {
+
+ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
+ PrevLoc->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ Value *ShuffledPrevLoc = IRB.CreateShuffleVector(
+ PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask);
+ Value *UpdatedPrevLoc = IRB.CreateInsertElement(
+ ShuffledPrevLoc, ConstantInt::get(Int32Ty, genLabel()),
+ (uint64_t)0);
+
+ IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc)
+ ->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ } else
+
+#endif
+ {
+
+ IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), AFLPrevLoc);
+
+ }
+
+ }
+
+ }
+
+ int has_calls = 0;
+ for (BasicBlock &BB : F) {
+
+ auto PI = pred_begin(&BB);
+ auto PE = pred_end(&BB);
+ IRBuilder<> IRB(&*BB.getFirstInsertionPt());
+ Value * L = NULL;
+ unsigned int cur_loc;
+
+ // Context sensitive coverage
+ if (ctx_str && &BB == &F.getEntryBlock()) {
+
+ PrevCtx = IRB.CreateLoad(AFLContext);
+ PrevCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ // does the function have calls? and is any of the calls larger than
+ // one basic block?
+ has_calls = 0;
+ for (auto &BB : F) {
+
+ if (has_calls) break;
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee || Callee->size() < function_minimum_size)
+ continue;
+ else {
+
+ has_calls = 1;
+ break;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // if yes we store a context ID for this function in the global var
+ if (has_calls) {
+
+ ConstantInt *NewCtx = ConstantInt::get(Int32Ty, genLabel());
+ StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+ StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ } // END of ctx_str
+
+ if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
+
+ if (PI == PE) {
+
+ cur_loc = genLabel();
+ L = ConstantInt::get(Int32Ty, cur_loc);
+
+ } else {
+
+ auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin());
+ DenseMap<BasicBlock *, unsigned> PredMap;
+ for (auto PI = pred_begin(&BB), PE = pred_end(&BB); PI != PE; ++PI) {
+
+ BasicBlock *PBB = *PI;
+ auto It = PredMap.insert({PBB, genLabel()});
+ unsigned Label = It.first->second;
+ cur_loc = Label;
+ PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
+
+ }
+
+ L = PN;
+
+ }
+
+ /* Load prev_loc */
+ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
+ PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ Value *PrevLocTrans;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ /* "For efficiency, we propose to hash the tuple as a key into the
+ hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where
+ prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
+
+ if (ngram_size)
+ PrevLocTrans =
+ IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty());
+ else
+#endif
+ PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+
+ if (ctx_str)
+ PrevLocTrans =
+ IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
+
+ /* Load SHM pointer */
+ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+ MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ Value *MapPtrIdx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size)
+ MapPtrIdx = IRB.CreateGEP(
+ MapPtr, IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, L), Int32Ty));
+ else
+#endif
+ MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, L));
+
+ /* Update bitmap */
+ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+ Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ Value *Incr = IRB.CreateAdd(Counter, One);
+
+#if LLVM_VERSION_MAJOR < 9
+ if (neverZero_counters_str !=
+ NULL) // with llvm 9 we make this the default as the bug in llvm is
+ // then fixed
+#else
+ if (!skip_nozero)
+#endif
+ {
+
+ /* hexcoder: Realize a counter that skips zero during overflow.
+ * Once this counter reaches its maximum value, it next increments to
+ * 1
+ *
+ * Instead of
+ * Counter + 1 -> Counter
+ * we inject now this
+ * Counter + 1 -> {Counter, OverflowFlag}
+ * Counter + OverflowFlag -> Counter
+ */
+ auto cf = IRB.CreateICmpEQ(Incr, Zero);
+ auto carry = IRB.CreateZExt(cf, Int8Ty);
+ Incr = IRB.CreateAdd(Incr, carry);
+
+ }
+
+ IRB.CreateStore(Incr, MapPtrIdx)
+ ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ if (ctx_str && has_calls) {
+
+ // in CTX mode we have to restore the original context for the
+ // caller - she might be calling other functions which need the
+ // correct CTX
+ Instruction *Inst = BB.getTerminator();
+ if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+ IRBuilder<> Post_IRB(Inst);
+ StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+ RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ total_instr++;
+
+ }
+
+ }
+
+ if (!be_quiet) {
+
+ char modeline[100];
+ snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+ getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+ getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+ getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+ getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+
+ OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr,
+ total_rs, total_hs, modeline);
+
+ }
+
+ return false;
+
+ }
+
+}; // end of struct InsTrim
+
+} // end of anonymous namespace
+
+char InsTrim::ID = 0;
+
+static void registerAFLPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new InsTrim());
+
+}
+
+static RegisterStandardPasses RegisterAFLPass(
+ PassManagerBuilder::EP_OptimizerLast, registerAFLPass);
+
+static RegisterStandardPasses RegisterAFLPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass);
+
diff --git a/instrumentation/MarkNodes.cc b/instrumentation/MarkNodes.cc
new file mode 100644
index 00000000..20a7df35
--- /dev/null
+++ b/instrumentation/MarkNodes.cc
@@ -0,0 +1,481 @@
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
+typedef long double max_align_t;
+#endif
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/CFG.h"
+#else
+ #include "llvm/Support/CFG.h"
+#endif
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+DenseMap<BasicBlock *, uint32_t> LMap;
+std::vector<BasicBlock *> Blocks;
+std::set<uint32_t> Marked, Markabove;
+std::vector<std::vector<uint32_t> > Succs, Preds;
+
+void reset() {
+
+ LMap.clear();
+ Blocks.clear();
+ Marked.clear();
+ Markabove.clear();
+
+}
+
+uint32_t start_point;
+
+void labelEachBlock(Function *F) {
+
+ // Fake single endpoint;
+ LMap[NULL] = Blocks.size();
+ Blocks.push_back(NULL);
+
+ // Assign the unique LabelID to each block;
+ for (auto I = F->begin(), E = F->end(); I != E; ++I) {
+
+ BasicBlock *BB = &*I;
+ LMap[BB] = Blocks.size();
+ Blocks.push_back(BB);
+
+ }
+
+ start_point = LMap[&F->getEntryBlock()];
+
+}
+
+void buildCFG(Function *F) {
+
+ Succs.resize(Blocks.size());
+ Preds.resize(Blocks.size());
+ for (size_t i = 0; i < Succs.size(); i++) {
+
+ Succs[i].clear();
+ Preds[i].clear();
+
+ }
+
+ for (auto S = F->begin(), E = F->end(); S != E; ++S) {
+
+ BasicBlock *BB = &*S;
+ uint32_t MyID = LMap[BB];
+
+ for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+
+ Succs[MyID].push_back(LMap[*I]);
+
+ }
+
+ }
+
+}
+
+std::vector<std::vector<uint32_t> > tSuccs;
+std::vector<bool> tag, indfs;
+
+void DFStree(size_t now_id) {
+
+ if (tag[now_id]) return;
+ tag[now_id] = true;
+ indfs[now_id] = true;
+ for (auto succ : tSuccs[now_id]) {
+
+ if (tag[succ] and indfs[succ]) {
+
+ Marked.insert(succ);
+ Markabove.insert(succ);
+ continue;
+
+ }
+
+ Succs[now_id].push_back(succ);
+ Preds[succ].push_back(now_id);
+ DFStree(succ);
+
+ }
+
+ indfs[now_id] = false;
+
+}
+
+void turnCFGintoDAG() {
+
+ tSuccs = Succs;
+ tag.resize(Blocks.size());
+ indfs.resize(Blocks.size());
+ for (size_t i = 0; i < Blocks.size(); ++i) {
+
+ Succs[i].clear();
+ tag[i] = false;
+ indfs[i] = false;
+
+ }
+
+ DFStree(start_point);
+ for (size_t i = 0; i < Blocks.size(); ++i)
+ if (Succs[i].empty()) {
+
+ Succs[i].push_back(0);
+ Preds[0].push_back(i);
+
+ }
+
+}
+
+uint32_t timeStamp;
+namespace DominatorTree {
+
+std::vector<std::vector<uint32_t> > cov;
+std::vector<uint32_t> dfn, nfd, par, sdom, idom, mom, mn;
+
+bool Compare(uint32_t u, uint32_t v) {
+
+ return dfn[u] < dfn[v];
+
+}
+
+uint32_t eval(uint32_t u) {
+
+ if (mom[u] == u) return u;
+ uint32_t res = eval(mom[u]);
+ if (Compare(sdom[mn[mom[u]]], sdom[mn[u]])) { mn[u] = mn[mom[u]]; }
+ return mom[u] = res;
+
+}
+
+void DFS(uint32_t now) {
+
+ timeStamp += 1;
+ dfn[now] = timeStamp;
+ nfd[timeStamp - 1] = now;
+ for (auto succ : Succs[now]) {
+
+ if (dfn[succ] == 0) {
+
+ par[succ] = now;
+ DFS(succ);
+
+ }
+
+ }
+
+}
+
+void DominatorTree() {
+
+ if (Blocks.empty()) return;
+ uint32_t s = start_point;
+
+ // Initialization
+ mn.resize(Blocks.size());
+ cov.resize(Blocks.size());
+ dfn.resize(Blocks.size());
+ nfd.resize(Blocks.size());
+ par.resize(Blocks.size());
+ mom.resize(Blocks.size());
+ sdom.resize(Blocks.size());
+ idom.resize(Blocks.size());
+
+ for (uint32_t i = 0; i < Blocks.size(); i++) {
+
+ dfn[i] = 0;
+ nfd[i] = Blocks.size();
+ cov[i].clear();
+ idom[i] = mom[i] = mn[i] = sdom[i] = i;
+
+ }
+
+ timeStamp = 0;
+ DFS(s);
+
+ for (uint32_t i = Blocks.size() - 1; i >= 1u; i--) {
+
+ uint32_t now = nfd[i];
+ if (now == Blocks.size()) { continue; }
+ for (uint32_t pre : Preds[now]) {
+
+ if (dfn[pre]) {
+
+ eval(pre);
+ if (Compare(sdom[mn[pre]], sdom[now])) { sdom[now] = sdom[mn[pre]]; }
+
+ }
+
+ }
+
+ cov[sdom[now]].push_back(now);
+ mom[now] = par[now];
+ for (uint32_t x : cov[par[now]]) {
+
+ eval(x);
+ if (Compare(sdom[mn[x]], par[now])) {
+
+ idom[x] = mn[x];
+
+ } else {
+
+ idom[x] = par[now];
+
+ }
+
+ }
+
+ }
+
+ for (uint32_t i = 1; i < Blocks.size(); i += 1) {
+
+ uint32_t now = nfd[i];
+ if (now == Blocks.size()) { continue; }
+ if (idom[now] != sdom[now]) idom[now] = idom[idom[now]];
+
+ }
+
+}
+
+} // namespace DominatorTree
+
+std::vector<uint32_t> Visited, InStack;
+std::vector<uint32_t> TopoOrder, InDeg;
+std::vector<std::vector<uint32_t> > t_Succ, t_Pred;
+
+void Go(uint32_t now, uint32_t tt) {
+
+ if (now == tt) return;
+ Visited[now] = InStack[now] = timeStamp;
+
+ for (uint32_t nxt : Succs[now]) {
+
+ if (Visited[nxt] == timeStamp and InStack[nxt] == timeStamp) {
+
+ Marked.insert(nxt);
+
+ }
+
+ t_Succ[now].push_back(nxt);
+ t_Pred[nxt].push_back(now);
+ InDeg[nxt] += 1;
+ if (Visited[nxt] == timeStamp) { continue; }
+ Go(nxt, tt);
+
+ }
+
+ InStack[now] = 0;
+
+}
+
+void TopologicalSort(uint32_t ss, uint32_t tt) {
+
+ timeStamp += 1;
+
+ Go(ss, tt);
+
+ TopoOrder.clear();
+ std::queue<uint32_t> wait;
+ wait.push(ss);
+ while (not wait.empty()) {
+
+ uint32_t now = wait.front();
+ wait.pop();
+ TopoOrder.push_back(now);
+ for (uint32_t nxt : t_Succ[now]) {
+
+ InDeg[nxt] -= 1;
+ if (InDeg[nxt] == 0u) { wait.push(nxt); }
+
+ }
+
+ }
+
+}
+
+std::vector<std::set<uint32_t> > NextMarked;
+bool Indistinguish(uint32_t node1, uint32_t node2) {
+
+ if (NextMarked[node1].size() > NextMarked[node2].size()) {
+
+ uint32_t _swap = node1;
+ node1 = node2;
+ node2 = _swap;
+
+ }
+
+ for (uint32_t x : NextMarked[node1]) {
+
+ if (NextMarked[node2].find(x) != NextMarked[node2].end()) { return true; }
+
+ }
+
+ return false;
+
+}
+
+void MakeUniq(uint32_t now) {
+
+ bool StopFlag = false;
+ if (Marked.find(now) == Marked.end()) {
+
+ for (uint32_t pred1 : t_Pred[now]) {
+
+ for (uint32_t pred2 : t_Pred[now]) {
+
+ if (pred1 == pred2) continue;
+ if (Indistinguish(pred1, pred2)) {
+
+ Marked.insert(now);
+ StopFlag = true;
+ break;
+
+ }
+
+ }
+
+ if (StopFlag) { break; }
+
+ }
+
+ }
+
+ if (Marked.find(now) != Marked.end()) {
+
+ NextMarked[now].insert(now);
+
+ } else {
+
+ for (uint32_t pred : t_Pred[now]) {
+
+ for (uint32_t x : NextMarked[pred]) {
+
+ NextMarked[now].insert(x);
+
+ }
+
+ }
+
+ }
+
+}
+
+bool MarkSubGraph(uint32_t ss, uint32_t tt) {
+
+ TopologicalSort(ss, tt);
+ if (TopoOrder.empty()) return false;
+
+ for (uint32_t i : TopoOrder) {
+
+ NextMarked[i].clear();
+
+ }
+
+ NextMarked[TopoOrder[0]].insert(TopoOrder[0]);
+ for (uint32_t i = 1; i < TopoOrder.size(); i += 1) {
+
+ MakeUniq(TopoOrder[i]);
+
+ }
+
+ // Check if there is an empty path.
+ if (NextMarked[tt].count(TopoOrder[0]) > 0) return true;
+ return false;
+
+}
+
+void MarkVertice() {
+
+ uint32_t s = start_point;
+
+ InDeg.resize(Blocks.size());
+ Visited.resize(Blocks.size());
+ InStack.resize(Blocks.size());
+ t_Succ.resize(Blocks.size());
+ t_Pred.resize(Blocks.size());
+ NextMarked.resize(Blocks.size());
+
+ for (uint32_t i = 0; i < Blocks.size(); i += 1) {
+
+ Visited[i] = InStack[i] = InDeg[i] = 0;
+ t_Succ[i].clear();
+ t_Pred[i].clear();
+
+ }
+
+ timeStamp = 0;
+ uint32_t t = 0;
+ bool emptyPathExists = true;
+
+ while (s != t) {
+
+ emptyPathExists &= MarkSubGraph(DominatorTree::idom[t], t);
+ t = DominatorTree::idom[t];
+
+ }
+
+ if (emptyPathExists) {
+
+ // Mark all exit blocks to catch the empty path.
+ Marked.insert(t_Pred[0].begin(), t_Pred[0].end());
+
+ }
+
+}
+
+// return {marked nodes}
+std::pair<std::vector<BasicBlock *>, std::vector<BasicBlock *> > markNodes(
+ Function *F) {
+
+ assert(F->size() > 0 && "Function can not be empty");
+
+ reset();
+ labelEachBlock(F);
+ buildCFG(F);
+ turnCFGintoDAG();
+ DominatorTree::DominatorTree();
+ MarkVertice();
+
+ std::vector<BasicBlock *> Result, ResultAbove;
+ for (uint32_t x : Markabove) {
+
+ auto it = Marked.find(x);
+ if (it != Marked.end()) Marked.erase(it);
+ if (x) ResultAbove.push_back(Blocks[x]);
+
+ }
+
+ for (uint32_t x : Marked) {
+
+ if (x == 0) {
+
+ continue;
+
+ } else {
+
+ Result.push_back(Blocks[x]);
+
+ }
+
+ }
+
+ return {Result, ResultAbove};
+
+}
+
diff --git a/instrumentation/MarkNodes.h b/instrumentation/MarkNodes.h
new file mode 100644
index 00000000..8ddc978d
--- /dev/null
+++ b/instrumentation/MarkNodes.h
@@ -0,0 +1,12 @@
+#ifndef __MARK_NODES__
+#define __MARK_NODES__
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include <vector>
+
+std::pair<std::vector<llvm::BasicBlock *>, std::vector<llvm::BasicBlock *>>
+markNodes(llvm::Function *F);
+
+#endif
+
diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md
new file mode 100644
index 00000000..5f855e1f
--- /dev/null
+++ b/instrumentation/README.cmplog.md
@@ -0,0 +1,42 @@
+# CmpLog instrumentation
+
+The CmpLog instrumentation enables the logging of the comparisons operands in a
+shared memory.
+
+These values can be used by various mutators built on top of it.
+At the moment we support the RedQueen mutator (input-2-state instructions only).
+
+## Build
+
+To use CmpLog, you have to build two versions of the instrumented target
+program.
+
+The first version is built using the regular AFL++ instrumentation.
+
+The second one, the CmpLog binary, with setting AFL_LLVM_CMPLOG during the compilation.
+
+For example:
+
+```
+./configure --cc=~/path/to/afl-clang-fast
+make
+cp ./program ./program.afl
+make clean
+export AFL_LLVM_CMPLOG=1
+./configure --cc=~/path/to/afl-clang-fast
+make
+cp ./program ./program.cmplog
+```
+
+## Use
+
+AFL++ has the new -c option that needs to be used to specify the CmpLog binary (the second
+build).
+
+For example:
+
+```
+afl-fuzz -i input -o output -c ./program.cmplog -m none -- ./program.afl @@
+```
+
+Be sure to use `-m none` because CmpLog can map a lot of pages.
diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md
new file mode 100644
index 00000000..caf2c09a
--- /dev/null
+++ b/instrumentation/README.ctx.md
@@ -0,0 +1,22 @@
+# AFL Context Sensitive Branch Coverage
+
+## What is this?
+
+This is an LLVM-based implementation of the context sensitive branch coverage.
+
+Basically every function gets its own ID and that ID is combined with the
+edges of the called functions.
+
+So if both function A and function B call a function C, the coverage
+collected in C will be different.
+
+In math the coverage is collected as follows:
+`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+## Usage
+
+Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in
+config.h to at least 18 and maybe up to 20 for this as otherwise too
+many map collisions occur.
diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md
new file mode 100644
index 00000000..919801d1
--- /dev/null
+++ b/instrumentation/README.gcc_plugin.md
@@ -0,0 +1,164 @@
+# GCC-based instrumentation for afl-fuzz
+
+See [../README.md](../README.md) for the general instruction manual.
+See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.
+
+TLDR:
+ * `apt-get install gcc-VERSION-plugin-dev`
+ * `make`
+ * gcc and g++ must point to the gcc-VERSION you you have to set AFL_CC/AFL_CXX
+ to point to these!
+ * just use afl-gcc-fast/afl-g++-fast normally like you would afl-clang-fast
+
+## 1) Introduction
+
+The code in this directory allows you to instrument programs for AFL using
+true compiler-level instrumentation, instead of the more crude
+assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
+several interesting properties:
+
+ - The compiler can make many optimizations that are hard to pull off when
+ manually inserting assembly. As a result, some slow, CPU-bound programs will
+ run up to around faster.
+
+ The gains are less pronounced for fast binaries, where the speed is limited
+ chiefly by the cost of creating new processes. In such cases, the gain will
+ probably stay within 10%.
+
+ - The instrumentation is CPU-independent. At least in principle, you should
+ be able to rely on it to fuzz programs on non-x86 architectures (after
+ building afl-fuzz with AFL_NOX86=1).
+
+ - Because the feature relies on the internals of GCC, it is gcc-specific
+ and will *not* work with LLVM (see ../llvm_mode for an alternative).
+
+Once this implementation is shown to be sufficiently robust and portable, it
+will probably replace afl-gcc. For now, it can be built separately and
+co-exists with the original code.
+
+The idea and much of the implementation comes from Laszlo Szekeres.
+
+## 2) How to use
+
+In order to leverage this mechanism, you need to have modern enough GCC
+(>= version 4.5.0) and the plugin headers installed on your system. That
+should be all you need. On Debian machines, these headers can be acquired by
+installing the `gcc-VERSION-plugin-dev` packages.
+
+To build the instrumentation itself, type 'make'. This will generate binaries
+called afl-gcc-fast and afl-g++-fast in the parent directory.
+
+The gcc and g++ compiler links have to point to gcc-VERSION - or set these
+by pointing the environment variables AFL_CC/AFL_CXX to them.
+If the CC/CXX have been overridden, those compilers will be used from
+those wrappers without using AFL_CXX/AFL_CC settings.
+
+Once this is done, you can instrument third-party code in a way similar to the
+standard operating mode of AFL, e.g.:
+
+ CC=/path/to/afl/afl-gcc-fast ./configure [...options...]
+ make
+
+Be sure to also include CXX set to afl-g++-fast for C++ code.
+
+The tool honors roughly the same environmental variables as afl-gcc (see
+[env_variables.md](../docs/env_variables.md). This includes AFL_INST_RATIO,
+AFL_USE_ASAN, AFL_HARDEN, and AFL_DONT_OPTIMIZE.
+
+Note: if you want the GCC plugin to be installed on your system for all
+users, you need to build it before issuing 'make install' in the parent
+directory.
+
+## 3) Gotchas, feedback, bugs
+
+This is an early-stage mechanism, so field reports are welcome. You can send bug
+reports to afl@aflplus.plus
+
+## 4) Bonus feature #1: deferred initialization
+
+AFL tries to optimize performance by executing the targeted binary just once,
+stopping it just before main(), and then cloning this "main" process to get
+a steady supply of targets to fuzz.
+
+Although this approach eliminates much of the OS-, linker- and libc-level
+costs of executing the program, it does not always help with binaries that
+perform other time-consuming initialization steps - say, parsing a large config
+file before getting to the fuzzed data.
+
+In such cases, it's beneficial to initialize the forkserver a bit later, once
+most of the initialization work is already done, but before the binary attempts
+to read the fuzzed input and parse it; in some cases, this can offer a 10x+
+performance gain. You can implement delayed initialization in LLVM mode in a
+fairly simple way.
+
+First, locate a suitable location in the code where the delayed cloning can
+take place. This needs to be done with *extreme* care to avoid breaking the
+binary. In particular, the program will probably malfunction if you select
+a location after:
+
+ - The creation of any vital threads or child processes - since the forkserver
+ can't clone them easily.
+
+ - The initialization of timers via setitimer() or equivalent calls.
+
+ - The creation of temporary files, network sockets, offset-sensitive file
+ descriptors, and similar shared-state resources - but only provided that
+ their state meaningfully influences the behavior of the program later on.
+
+ - Any access to the fuzzed input, including reading the metadata about its
+ size.
+
+With the location selected, add this code in the appropriate spot:
+
+```
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+ __AFL_INIT();
+#endif
+```
+
+You don't need the #ifdef guards, but they will make the program still work as
+usual when compiled with a tool other than afl-gcc-fast/afl-clang-fast.
+
+Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
+*not* generate a deferred-initialization binary) - and you should be all set!
+
+## 5) Bonus feature #2: persistent mode
+
+Some libraries provide APIs that are stateless, or whose state can be reset in
+between processing different input files. When such a reset is performed, a
+single long-lived process can be reused to try out multiple test cases,
+eliminating the need for repeated fork() calls and the associated OS overhead.
+
+The basic structure of the program that does this would be:
+
+```
+ while (__AFL_LOOP(1000)) {
+
+ /* Read input data. */
+ /* Call library code to be fuzzed. */
+ /* Reset state. */
+
+ }
+
+ /* Exit normally */
+```
+
+The numerical value specified within the loop controls the maximum number
+of iterations before AFL will restart the process from scratch. This minimizes
+the impact of memory leaks and similar glitches; 1000 is a good starting point.
+
+A more detailed template is shown in ../examples/persistent_demo/.
+Similarly to the previous mode, the feature works only with afl-gcc-fast or
+afl-clang-fast; #ifdef guards can be used to suppress it when using other
+compilers.
+
+Note that as with the previous mode, the feature is easy to misuse; if you
+do not reset the critical state fully, you may end up with false positives or
+waste a whole lot of CPU power doing nothing useful at all. Be particularly
+wary of memory leaks and the state of file descriptors.
+
+When running in this mode, the execution paths will inherently vary a bit
+depending on whether the input loop is being entered for the first time or
+executed again. To avoid spurious warnings, the feature implies
+AFL_NO_VAR_CHECK and hides the "variable path" warnings in the UI.
+
diff --git a/instrumentation/README.instrim.md b/instrumentation/README.instrim.md
new file mode 100644
index 00000000..99f6477a
--- /dev/null
+++ b/instrumentation/README.instrim.md
@@ -0,0 +1,30 @@
+# InsTrim
+
+InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing
+
+## Introduction
+
+InsTrim is the work of Chin-Chia Hsu, Che-Yu Wu, Hsu-Chun Hsiao and Shih-Kun Huang.
+
+It uses a CFG (call flow graph) and markers to instrument just what
+is necessary in the binary (ie less than llvm_mode). As a result the binary is
+about 10-15% faster compared to normal llvm_mode however with some coverage loss.
+It requires at least llvm version 3.8.0 to build.
+If you have LLVM 7+ we recommend PCGUARD instead.
+
+## Usage
+
+Set the environment variable `AFL_LLVM_INSTRUMENT=CFG` or `AFL_LLVM_INSTRIM=1`
+during compilation of the target.
+
+There is also special mode which instruments loops in a way so that
+afl-fuzz can see which loop path has been selected but not being able to
+see how often the loop has been rerun.
+This again is a tradeoff for speed for less path information.
+To enable this mode set `AFL_LLVM_INSTRIM_LOOPHEAD=1`.
+
+## Background
+
+The paper from Chin-Chia Hsu, Che-Yu Wu, Hsu-Chun Hsiao and Shih-Kun Huang:
+[InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing]
+(https://www.ndss-symposium.org/wp-content/uploads/2018/07/bar2018_14_Hsu_paper.pdf)
diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md
new file mode 100644
index 00000000..122be2b6
--- /dev/null
+++ b/instrumentation/README.instrument_list.md
@@ -0,0 +1,96 @@
+# Using afl++ with partial instrumentation
+
+ This file describes how to selectively instrument only source files
+ or functions that are of interest to you using the LLVM and GCC_PLUGIN
+ instrumentation provided by afl++.
+
+## 1) Description and purpose
+
+When building and testing complex programs where only a part of the program is
+the fuzzing target, it often helps to only instrument the necessary parts of
+the program, leaving the rest uninstrumented. This helps to focus the fuzzer
+on the important parts of the program, avoiding undesired noise and
+disturbance by uninteresting code being exercised.
+
+For this purpose, a "partial instrumentation" support en par with llvm sancov
+is provided by afl++ that allows to specify on a source file and function
+level which function should be compiled with or without instrumentation.
+
+Note: When using PCGUARD mode - and llvm 12+ - you can use this instead:
+https://clang.llvm.org/docs/SanitizerCoverage.html#partially-disabling-instrumentation
+
+The llvm sancov list format is fully supported by afl++, however afl++ has
+more flexibility.
+
+## 2a) Building the LLVM module
+
+The new code is part of the existing afl++ LLVM module in the instrumentation/
+subdirectory. There is nothing specifically to do for the build :)
+
+## 2b) Building the GCC module
+
+The new code is part of the existing afl++ GCC_PLUGIN module in the
+instrumentation/ subdirectory. There is nothing specifically to do for
+the build :)
+
+## 3) How to use the partial instrumentation mode
+
+In order to build with partial instrumentation, you need to build with
+afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++.
+The only required change is that you need to set either the environment variable
+AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST set with a filename.
+
+That file should contain the file names or functions that are to be instrumented
+(AFL_LLVM_ALLOWLIST) or are specifically NOT to be instrumented (AFL_LLVM_DENYLIST).
+
+GCC_PLUGIN: you can use either AFL_LLVM_ALLOWLIST or AFL_GCC_ALLOWLIST (or the
+same for _DENYLIST), both work.
+
+For matching to succeed, the function/file name that is being compiled must end in the
+function/file name entry contained in this instrument file list. That is to avoid
+breaking the match when absolute paths are used during compilation.
+
+**NOTE:** In builds with optimization enabled, functions might be inlined and would not match!
+
+For example if your source tree looks like this:
+```
+project/
+project/feature_a/a1.cpp
+project/feature_a/a2.cpp
+project/feature_b/b1.cpp
+project/feature_b/b2.cpp
+```
+
+and you only want to test feature_a, then create an "instrument file list" file containing:
+```
+feature_a/a1.cpp
+feature_a/a2.cpp
+```
+
+However if the "instrument file list" file contains only this, it works as well:
+```
+a1.cpp
+a2.cpp
+```
+but it might lead to files being unwantedly instrumented if the same filename
+exists somewhere else in the project directories.
+
+You can also specify function names. Note that for C++ the function names
+must be mangled to match! `nm` can print these names.
+
+afl++ is able to identify whether an entry is a filename or a function.
+However if you want to be sure (and compliant to the sancov allow/blocklist
+format), you can specify source file entries like this:
+```
+src: *malloc.c
+```
+and function entries like this:
+```
+fun: MallocFoo
+```
+Note that whitespace is ignored and comments (`# foo`) are supported.
+
+## 4) UNIX-style pattern matching
+
+You can add UNIX-style pattern matching in the "instrument file list" entries.
+See `man fnmatch` for the syntax. We do not set any of the `fnmatch` flags.
diff --git a/instrumentation/README.laf-intel.md b/instrumentation/README.laf-intel.md
new file mode 100644
index 00000000..c50a6979
--- /dev/null
+++ b/instrumentation/README.laf-intel.md
@@ -0,0 +1,56 @@
+# laf-intel instrumentation
+
+## Introduction
+
+This originally is the work of an individual nicknamed laf-intel.
+His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations]
+(https://lafintel.wordpress.com/) and gitlab repo [laf-llvm-pass]
+(https://gitlab.com/laf-intel/laf-llvm-pass/)
+describe some code transformations that
+help afl++ to enter conditional blocks, where conditions consist of
+comparisons of large values.
+
+## Usage
+
+By default these passes will not run when you compile programs using
+afl-clang-fast. Hence, you can use AFL as usual.
+To enable the passes you must set environment variables before you
+compile the target project.
+
+The following options exist:
+
+`export AFL_LLVM_LAF_SPLIT_SWITCHES=1`
+
+Enables the split-switches pass.
+
+`export AFL_LLVM_LAF_TRANSFORM_COMPARES=1`
+
+Enables the transform-compares pass (strcmp, memcmp, strncmp,
+strcasecmp, strncasecmp).
+
+`export AFL_LLVM_LAF_SPLIT_COMPARES=1`
+
+Enables the split-compares pass.
+By default it will
+1. simplify operators >= (and <=) into chains of > (<) and == comparisons
+2. change signed integer comparisons to a chain of sign-only comparison
+and unsigned integer comparisons
+3. split all unsigned integer comparisons with bit widths of
+64, 32 or 16 bits to chains of 8 bits comparisons.
+
+You can change the behaviour of the last step by setting
+`export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where
+bit_width may be 64, 32 or 16. For example, a bit_width of 16
+would split larger comparisons down to 16 bit comparisons.
+
+A new experimental feature is splitting floating point comparisons into a
+series of sign, exponent and mantissa comparisons followed by splitting each
+of them into 8 bit comparisons when necessary.
+It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting.
+Please note that full IEEE 754 functionality is not preserved, that is
+values of nan and infinity will probably behave differently.
+
+Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES`
+
+You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-)
+
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
new file mode 100644
index 00000000..07636970
--- /dev/null
+++ b/instrumentation/README.llvm.md
@@ -0,0 +1,194 @@
+# Fast LLVM-based instrumentation for afl-fuzz
+
+ (See [../README.md](../README.md) for the general instruction manual.)
+
+ (See [README.gcc_plugon.md](../README.gcc_plugin.md) for the GCC-based instrumentation.)
+
+## 1) Introduction
+
+! llvm_mode works with llvm versions 3.4 up to 12 !
+
+The code in this directory allows you to instrument programs for AFL using
+true compiler-level instrumentation, instead of the more crude
+assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
+several interesting properties:
+
+ - The compiler can make many optimizations that are hard to pull off when
+ manually inserting assembly. As a result, some slow, CPU-bound programs will
+ run up to around 2x faster.
+
+ The gains are less pronounced for fast binaries, where the speed is limited
+ chiefly by the cost of creating new processes. In such cases, the gain will
+ probably stay within 10%.
+
+ - The instrumentation is CPU-independent. At least in principle, you should
+ be able to rely on it to fuzz programs on non-x86 architectures (after
+ building afl-fuzz with AFL_NO_X86=1).
+
+ - The instrumentation can cope a bit better with multi-threaded targets.
+
+ - Because the feature relies on the internals of LLVM, it is clang-specific
+ and will *not* work with GCC (see ../gcc_plugin/ for an alternative once
+ it is available).
+
+Once this implementation is shown to be sufficiently robust and portable, it
+will probably replace afl-clang. For now, it can be built separately and
+co-exists with the original code.
+
+The idea and much of the intial implementation came from Laszlo Szekeres.
+
+## 2a) How to use this - short
+
+Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g.
+```
+LLVM_CONFIG=llvm-config-9 make
+```
+In case you have your own compiled llvm version specify the full path:
+```
+LLVM_CONFIG=~/llvm-project/build/bin/llvm-config make
+```
+If you try to use a new llvm version on an old Linux this can fail because of
+old c++ libraries. In this case usually switching to gcc/g++ to compile
+llvm_mode will work:
+```
+LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make
+```
+It is highly recommended to use the newest clang version you can put your
+hands on :)
+
+Then look at [README.persistent_mode.md](README.persistent_mode.md).
+
+## 2b) How to use this - long
+
+In order to leverage this mechanism, you need to have clang installed on your
+system. You should also make sure that the llvm-config tool is in your path
+(or pointed to via LLVM_CONFIG in the environment).
+
+Note that if you have several LLVM versions installed, pointing LLVM_CONFIG
+to the version you want to use will switch compiling to this specific
+version - if you installation is set up correctly :-)
+
+Unfortunately, some systems that do have clang come without llvm-config or the
+LLVM development headers; one example of this is FreeBSD. FreeBSD users will
+also run into problems with clang being built statically and not being able to
+load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so).
+
+To solve all your problems, you can grab pre-built binaries for your OS from:
+
+ http://llvm.org/releases/download.html
+
+...and then put the bin/ directory from the tarball at the beginning of your
+$PATH when compiling the feature and building packages later on. You don't need
+to be root for that.
+
+To build the instrumentation itself, type 'make'. This will generate binaries
+called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this
+is done, you can instrument third-party code in a way similar to the standard
+operating mode of AFL, e.g.:
+
+```
+ CC=/path/to/afl/afl-clang-fast ./configure [...options...]
+ make
+```
+
+Be sure to also include CXX set to afl-clang-fast++ for C++ code.
+
+Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc.
+You can also use afl-cc/afl-c++ and instead direct it to use LLVM
+instrumentation by either setting `AFL_CC_COMPILER=LLVM` or pass the parameter
+`--afl-llvm` via CFLAGS/CXXFLAGS/CPPFLAGS.
+
+The tool honors roughly the same environmental variables as afl-gcc (see
+[docs/env_variables.md](../docs/env_variables.md)). This includes AFL_USE_ASAN,
+AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored
+as it does not serve a good purpose with the more effective PCGUARD, LTO and
+ instrim CFG analysis.
+
+## 3) Options
+
+Several options are present to make llvm_mode faster or help it rearrange
+the code to make afl-fuzz path discovery easier.
+
+If you need just to instrument specific parts of the code, you can the instrument file list
+which C/C++ files to actually instrument. See [README.instrument_list.md](README.instrument_list.md)
+
+For splitting memcmp, strncmp, etc. please see [README.laf-intel.md](README.laf-intel.md)
+
+Then there are different ways of instrumenting the target:
+
+1. There is an optimized instrumentation strategy that uses CFGs and
+markers to just instrument what is needed. This increases speed by 10-15%
+without any disadvantages
+If you want to use this, set AFL_LLVM_INSTRUMENT=CFG or AFL_LLVM_INSTRIM=1
+See [README.instrim.md](README.instrim.md)
+
+2. An even better instrumentation strategy uses LTO and link time
+instrumentation. Note that not all targets can compile in this mode, however
+if it works it is the best option you can use.
+Simply use afl-clang-lto/afl-clang-lto++ to use this option.
+See [README.lto.md](README.lto.md)
+
+3. Alternativly you can choose a completely different coverage method:
+
+3a. N-GRAM coverage - which combines the previous visited edges with the
+current one. This explodes the map but on the other hand has proven to be
+effective for fuzzing.
+See [README.ngram.md](README.ngram.md)
+
+3b. Context sensitive coverage - which combines the visited edges with an
+individual caller ID (the function that called the current one)
+[README.ctx.md](README.ctx.md)
+
+Then - additionally to one of the instrumentation options above - there is
+a very effective new instrumentation option called CmpLog as an alternative to
+laf-intel that allow AFL++ to apply mutations similar to Redqueen.
+See [README.cmplog.md](README.cmplog.md)
+
+Finally if your llvm version is 8 or lower, you can activate a mode that
+prevents that a counter overflow result in a 0 value. This is good for
+path discovery, but the llvm implementation for x86 for this functionality
+is not optimal and was only fixed in llvm 9.
+You can set this with AFL_LLVM_NOT_ZERO=1
+See [README.neverzero.md](README.neverzero.md)
+
+## 4) Snapshot feature
+
+To speed up fuzzing you can use a linux loadable kernel module which enables
+a snapshot feature.
+See [README.snapshot.md](README.snapshot.md)
+
+## 5) Gotchas, feedback, bugs
+
+This is an early-stage mechanism, so field reports are welcome. You can send bug
+reports to <afl-users@googlegroups.com>.
+
+## 6) deferred initialization, persistent mode, shared memory fuzzing
+
+This is the most powerful and effective fuzzing you can do.
+Please see [README.persistent_mode.md](README.persistent_mode.md) for a
+full explanation.
+
+## 7) Bonus feature: 'trace-pc-guard' mode
+
+LLVM is shipping with a built-in execution tracing feature
+that provides AFL with the necessary tracing data without the need to
+post-process the assembly or install any compiler plugins. See:
+
+ http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards
+
+If you have not an outdated compiler and want to give it a try, build
+targets this way:
+
+```
+AFL_LLVM_INSTRUMENT=PCGUARD make
+```
+
+Note that this is currently the default if you use LLVM >= 7, as it is the best
+mode. Recommended is LLVM >= 9.
+If you have llvm 11+ and compiled afl-clang-lto - this is the only better mode.
+
+## 8) Bonus feature: 'dict2file' pass
+
+Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation
+all constant string compare parameters will be written to this file to be
+used with afl-fuzz' `-x` option.
diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md
new file mode 100644
index 00000000..abdbd2ac
--- /dev/null
+++ b/instrumentation/README.lto.md
@@ -0,0 +1,290 @@
+# afl-clang-lto - collision free instrumentation at link time
+
+## TLDR;
+
+This version requires a current llvm 11+ compiled from the github master.
+
+1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
+ coverage than anything else that is out there in the AFL world
+
+2. You can use it together with llvm_mode: laf-intel and the instrument file listing
+ features and can be combined with cmplog/Redqueen
+
+3. It only works with llvm 11+
+
+4. AUTODICTIONARY feature! see below
+
+5. If any problems arise be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`.
+ Some targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`.
+
+## Introduction and problem description
+
+A big issue with how afl/afl++ works is that the basic block IDs that are
+set during compilation are random - and hence naturally the larger the number
+of instrumented locations, the higher the number of edge collisions are in the
+map. This can result in not discovering new paths and therefore degrade the
+efficiency of the fuzzing process.
+
+*This issue is underestimated in the fuzzing community!*
+With a 2^16 = 64kb standard map at already 256 instrumented blocks there is
+on average one collision. On average a target has 10.000 to 50.000
+instrumented blocks hence the real collisions are between 750-18.000!
+
+To reach a solution that prevents any collisions took several approaches
+and many dead ends until we got to this:
+
+ * We instrument at link time when we have all files pre-compiled
+ * To instrument at link time we compile in LTO (link time optimization) mode
+ * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the
+ correct LTO options and runs our own afl-ld linker instead of the system
+ linker
+ * The LLVM linker collects all LTO files to link and instruments them so that
+ we have non-colliding edge overage
+ * We use a new (for afl) edge coverage - which is the same as in llvm
+ -fsanitize=coverage edge coverage mode :)
+
+The result:
+ * 10-25% speed gain compared to llvm_mode
+ * guaranteed non-colliding edge coverage :-)
+ * The compile time especially for binaries to an instrumented library can be
+ much longer
+
+Example build output from a libtiff build:
+```
+libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm
+afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO
+afl-llvm-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de>
+AUTODICTIONARY: 11 strings found
+[+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode).
+```
+
+## Getting llvm 11+
+
+### Installing llvm from the llvm repository (version 11)
+
+Installing the llvm snapshot builds is easy and mostly painless:
+
+In the follow line change `NAME` for your Debian or Ubuntu release name
+(e.g. buster, focal, eon, etc.):
+```
+echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list
+```
+then add the pgp key of llvm and install the packages:
+```
+wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
+apt-get update && apt-get upgrade -y
+apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \
+ libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \
+ libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \
+ liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \
+ libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools
+```
+
+### Building llvm yourself (version 12)
+
+Building llvm from github takes quite some long time and is not painless:
+```
+sudo apt install binutils-dev # this is *essential*!
+git clone https://github.com/llvm/llvm-project
+cd llvm-project
+mkdir build
+cd build
+cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
+make -j $(nproc)
+export PATH=`pwd`/bin:$PATH
+export LLVM_CONFIG=`pwd`/bin/llvm-config
+cd /path/to/AFLplusplus/
+make
+sudo make install
+```
+
+## How to use afl-clang-lto
+
+Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc.
+
+Also the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST -> [README.instrument_list.md](README.instrument_list.md)) and
+laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work.
+
+Example:
+```
+CC=afl-clang-lto CXX=afl-clang-lto++ RANLIB=llvm-ranlib AR=llvm-ar ./configure
+make
+```
+
+NOTE: some targets also need to set the linker, try both `afl-clang-lto` and
+`afl-ld-lto` for `LD=` before `configure`.
+
+## AUTODICTIONARY feature
+
+While compiling, a dictionary based on string comparisons is automatically
+generated and put into the target binary. This dictionary is transfered to afl-fuzz
+on start. This improves coverage statistically by 5-10% :)
+
+## Fixed memory map
+
+To speed up fuzzing, it is possible to set a fixed shared memory map.
+Recommended is the value 0x10000.
+In most cases this will work without any problems. However if a target uses
+early constructors, ifuncs or a deferred forkserver this can crash the target.
+On unusual operating systems/processors/kernels or weird libraries this might
+fail so to change the fixed address at compile time set
+AFL_LLVM_MAP_ADDR with a better value (a value of 0 or empty sets the map address
+to be dynamic - the original afl way, which is slower).
+
+## Document edge IDs
+
+Setting `export AFL_LLVM_DOCUMENT_IDS=file` will document in a file which edge
+ID was given to which function. This helps to identify functions with variable
+bytes or which functions were touched by an input.
+
+## Solving difficult targets
+
+Some targets are difficult because the configure script does unusual stuff that
+is unexpected for afl. See the next chapter `Potential issues` for how to solve
+these.
+
+### Example: ffmpeg
+
+An example of a hard to solve target is ffmpeg. Here is how to successfully
+instrument it:
+
+1. Get and extract the current ffmpeg and change to its directory
+
+2. Running configure with --cc=clang fails and various other items will fail
+ when compiling, so we have to trick configure:
+
+```
+./configure --enable-lto --disable-shared --disable-inline-asm
+```
+
+3. Now the configuration is done - and we edit the settings in `./ffbuild/config.mak`
+ (-: the original line, +: what to change it into):
+```
+-CC=gcc
++CC=afl-clang-lto
+-CXX=g++
++CXX=afl-clang-lto++
+-AS=gcc
++AS=llvm-as
+-LD=gcc
++LD=afl-clang-lto++
+-DEPCC=gcc
++DEPCC=afl-clang-lto
+-DEPAS=gcc
++DEPAS=afl-clang-lto++
+-AR=ar
++AR=llvm-ar
+-AR_CMD=ar
++AR_CMD=llvm-ar
+-NM_CMD=nm -g
++NM_CMD=llvm-nm -g
+-RANLIB=ranlib -D
++RANLIB=llvm-ranlib -D
+```
+
+4. Then type make, wait for a long time and you are done :)
+
+### Example: WebKit jsc
+
+Building jsc is difficult as the build script has bugs.
+
+1. checkout Webkit:
+```
+svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit
+cd WebKit
+```
+
+2. Fix the build environment:
+```
+mkdir -p WebKitBuild/Release
+cd WebKitBuild/Release
+ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12
+ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12
+cd ../..
+```
+
+3. Build :)
+
+```
+Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON"
+```
+
+## Potential issues
+
+### compiling libraries fails
+
+If you see this message:
+```
+/bin/ld: libfoo.a: error adding symbols: archive has no index; run ranlib to add one
+```
+This is because usually gnu gcc ranlib is being called which cannot deal with clang LTO files.
+The solution is simple: when you ./configure you also have to set RANLIB=llvm-ranlib and AR=llvm-ar
+
+Solution:
+```
+AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared
+```
+and on some targets you have to set AR=/RANLIB= even for make as the configure script does not save it.
+Other targets ignore environment variables and need the parameters set via
+`./configure --cc=... --cxx= --ranlib= ...` etc. (I am looking at you ffmpeg!).
+
+
+If you see this message
+```
+assembler command failed ...
+```
+then try setting `llvm-as` for configure:
+```
+AS=llvm-as ...
+```
+
+### compiling programs still fail
+
+afl-clang-lto is still work in progress.
+
+Known issues:
+ * Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either - obviously
+ * Anything that does not compile with LTO, afl-clang-lto cannot compile either - obviously
+
+Hence if building a target with afl-clang-lto fails try to build it with llvm12
+and LTO enabled (`CC=clang-12` `CXX=clang++-12` `CFLAGS=-flto=full` and
+`CXXFLAGS=-flto=full`).
+
+If this succeeeds then there is an issue with afl-clang-lto. Please report at
+[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226)
+
+Even some targets where clang-12 fails can be build if the fail is just in
+`./configure`, see `Solving difficult targets` above.
+
+## History
+
+This was originally envisioned by hexcoder- in Summer 2019, however we saw no
+way to create a pass that is run at link time - although there is a option
+for this in the PassManager: EP_FullLinkTimeOptimizationLast
+("Fun" info - nobody knows what this is doing. And the developer who
+implemented this didn't respond to emails.)
+
+In December then came the idea to implement this as a pass that is run via
+the llvm "opt" program, which is performed via an own linker that afterwards
+calls the real linker.
+This was first implemented in January and work ... kinda.
+The LTO time instrumentation worked, however "how" the basic blocks were
+instrumented was a problem, as reducing duplicates turned out to be very,
+very difficult with a program that has so many paths and therefore so many
+dependencies. A lot of strategies were implemented - and failed.
+And then sat solvers were tried, but with over 10.000 variables that turned
+out to be a dead-end too.
+
+The final idea to solve this came from domenukk who proposed to insert a block
+into an edge and then just use incremental counters ... and this worked!
+After some trials and errors to implement this vanhauser-thc found out that
+there is actually an llvm function for this: SplitEdge() :-)
+
+Still more problems came up though as this only works without bugs from
+llvm 9 onwards, and with high optimization the link optimization ruins
+the instrumented control flow graph.
+
+This is all now fixed with llvm 11+. The llvm's own linker is now able to
+load passes and this bypasses all problems we had.
+
+Happy end :)
diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md
new file mode 100644
index 00000000..5c894d6e
--- /dev/null
+++ b/instrumentation/README.neverzero.md
@@ -0,0 +1,35 @@
+# NeverZero counters for LLVM instrumentation
+
+## Usage
+
+In larger, complex or reiterative programs the byte sized counters that collect
+the edge coverage can easily fill up and wrap around.
+This is not that much of an issue - unless by chance it wraps just to a value
+of zero when the program execution ends.
+In this case afl-fuzz is not able to see that the edge has been accessed and
+will ignore it.
+
+NeverZero prevents this behaviour. If a counter wraps, it jumps over the value
+0 directly to a 1. This improves path discovery (by a very little amount)
+at a very little cost (one instruction per edge).
+
+(The alternative of saturated counters has been tested also and proved to be
+inferior in terms of path discovery.)
+
+This is implemented in afl-gcc, however for llvm_mode this is optional if
+the llvm version is below 9 - as there is a perfomance bug that is only fixed
+in version 9 and onwards.
+
+If you want to enable this for llvm versions below 9 then set
+
+```
+export AFL_LLVM_NOT_ZERO=1
+```
+
+In case you are on llvm 9 or greater and you do not want this behaviour then
+you can set:
+```
+AFL_LLVM_SKIP_NEVERZERO=1
+```
+If the target does not have extensive loops or functions that are called
+a lot then this can give a small performance boost.
diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md
new file mode 100644
index 00000000..de3ba432
--- /dev/null
+++ b/instrumentation/README.ngram.md
@@ -0,0 +1,28 @@
+# AFL N-Gram Branch Coverage
+
+## Source
+
+This is an LLVM-based implementation of the n-gram branch coverage proposed in
+the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics
+in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf),
+by Jinghan Wang, et. al.
+
+Note that the original implementation (available
+[here](https://github.com/bitsecurerlab/afl-sensitive))
+is built on top of AFL's QEMU mode.
+This is essentially a port that uses LLVM vectorized instructions to achieve
+the same results when compiling source code.
+
+In math the branch coverage is performed as follows:
+`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
+
+## Usage
+
+The size of `n` (i.e., the number of branches to remember) is an option
+that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
+`AFL_LLVM_NGRAM_SIZE` environment variable.
+Good values are 2, 4 or 8, valid are 2-16.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in
+config.h to at least 18 and maybe up to 20 for this as otherwise too
+many map collisions occur.
diff --git a/instrumentation/README.out_of_line.md b/instrumentation/README.out_of_line.md
new file mode 100644
index 00000000..aad215b6
--- /dev/null
+++ b/instrumentation/README.out_of_line.md
@@ -0,0 +1,21 @@
+===========================================
+Using afl++ without inlined instrumentation
+===========================================
+
+ This file describes how you can disable inlining of instrumentation.
+
+
+By default, the GCC plugin will duplicate the effects of calling
+__afl_trace (see afl-gcc-rt.o.c) in instrumented code, instead of
+issuing function calls.
+
+The calls are presumed to be slower, more so because the rt file
+itself is not optimized by the compiler.
+
+Setting AFL_GCC_OUT_OF_LINE=1 in the environment while compiling code
+with the plugin will disable this inlining, issuing calls to the
+unoptimized runtime instead.
+
+You probably don't want to do this, but it might be useful in certain
+AFL debugging scenarios, and it might work as a fallback in case
+something goes wrong with the inlined instrumentation.
diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md
new file mode 100644
index 00000000..e095f036
--- /dev/null
+++ b/instrumentation/README.persistent_mode.md
@@ -0,0 +1,209 @@
+# llvm_mode persistent mode
+
+## 1) Introduction
+
+The most effective way is to fuzz in persistent mode, as the speed can easily
+be x10 or x20 times faster without any disadvanges.
+*All professional fuzzing is using this mode.*
+
+This requires that the target can be called in a (or several) function(s),
+and that its state can be resetted so that multiple calls can be performed
+without resource leaks and former runs having no impact on following runs
+(this can be seen by the `stability` indicator in the `afl-fuzz` UI).
+
+Examples can be found in [examples/persistent_mode](../examples/persistent_mode).
+
+## 2) TLDR;
+
+Example `fuzz_target.c`:
+```
+#include "what_you_need_for_your_target.h"
+
+__AFL_FUZZ_INIT();
+
+main() {
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+ __AFL_INIT();
+#endif
+
+ unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT
+
+ while (__AFL_LOOP(10000)) {
+
+ int len = __AFL_FUZZ_TESTCASE_LEN;
+ if (len < 8) continue; // check for a required/useful minimum input length
+
+ /* Setup function call, e.g. struct target *tmp = libtarget_init() */
+ /* Call function to be fuzzed, e.g.: */
+ target_function(buf, len);
+ /* Reset state. e.g. libtarget_free(tmp) */
+
+ }
+
+ return 0;
+
+}
+```
+And then compile:
+```
+afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target
+```
+And that is it!
+The speed increase is usually x10 to x20.
+
+If you want to be able to compile the target without afl-clang-fast/lto then
+add this just after the includes:
+
+```
+#ifndef __AFL_FUZZ_TESTCASE_LEN
+ ssize_t fuzz_len;
+ #define __AFL_FUZZ_TESTCASE_LEN fuzz_len
+ unsigned char fuzz_buf[1024000];
+ #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
+ #define __AFL_FUZZ_INIT() void sync(void);
+ #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ?
+ #define __AFL_INIT() sync()
+#endif
+```
+
+## 3) Deferred initialization
+
+AFL tries to optimize performance by executing the targeted binary just once,
+stopping it just before main(), and then cloning this "main" process to get
+a steady supply of targets to fuzz.
+
+Although this approach eliminates much of the OS-, linker- and libc-level
+costs of executing the program, it does not always help with binaries that
+perform other time-consuming initialization steps - say, parsing a large config
+file before getting to the fuzzed data.
+
+In such cases, it's beneficial to initialize the forkserver a bit later, once
+most of the initialization work is already done, but before the binary attempts
+to read the fuzzed input and parse it; in some cases, this can offer a 10x+
+performance gain. You can implement delayed initialization in LLVM mode in a
+fairly simple way.
+
+First, find a suitable location in the code where the delayed cloning can
+take place. This needs to be done with *extreme* care to avoid breaking the
+binary. In particular, the program will probably malfunction if you select
+a location after:
+
+ - The creation of any vital threads or child processes - since the forkserver
+ can't clone them easily.
+
+ - The initialization of timers via setitimer() or equivalent calls.
+
+ - The creation of temporary files, network sockets, offset-sensitive file
+ descriptors, and similar shared-state resources - but only provided that
+ their state meaningfully influences the behavior of the program later on.
+
+ - Any access to the fuzzed input, including reading the metadata about its
+ size.
+
+With the location selected, add this code in the appropriate spot:
+
+```c
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+ __AFL_INIT();
+#endif
+```
+
+You don't need the #ifdef guards, but including them ensures that the program
+will keep working normally when compiled with a tool other than afl-clang-fast.
+
+Finally, recompile the program with afl-clang-fast/lto (afl-gcc or afl-clang will
+*not* generate a deferred-initialization binary) - and you should be all set!
+
+*NOTE:* In the code between `main` and `__AFL_INIT()` should not be any code
+run that is instrumented - otherwise a crash might occure.
+In case this is useful (e.g. for expensive one time initialization) you can
+try to do the following:
+
+Add after the includes:
+```
+extern unsigned char *__afl_area_ptr;
+#define MAX_DUMMY_SIZE 256000
+
+__attribute__((constructor(1))) void __afl_protect(void) {
+#ifdef MAP_FIXED_NOREPLACE
+ __afl_area_ptr = (unsigned char*) mmap((void *)0x10000, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if ((uint64_t)__afl_area_ptr == -1)
+#endif
+ __afl_area_ptr = (unsigned char*) mmap((void *)0x10000, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if ((uint64_t)__afl_area_ptr == -1)
+ __afl_area_ptr = (unsigned char*) mmap(NULL, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+}
+
+```
+and just before `__AFL_INIT()`:
+```
+ munmap(__afl_area_ptr, MAX_DUMMY_SIZE);
+ __afl_area_ptr = NULL;
+```
+
+## 4) Persistent mode
+
+Some libraries provide APIs that are stateless, or whose state can be reset in
+between processing different input files. When such a reset is performed, a
+single long-lived process can be reused to try out multiple test cases,
+eliminating the need for repeated fork() calls and the associated OS overhead.
+
+The basic structure of the program that does this would be:
+
+```c
+ while (__AFL_LOOP(1000)) {
+
+ /* Read input data. */
+ /* Call library code to be fuzzed. */
+ /* Reset state. */
+
+ }
+
+ /* Exit normally */
+```
+
+The numerical value specified within the loop controls the maximum number
+of iterations before AFL will restart the process from scratch. This minimizes
+the impact of memory leaks and similar glitches; 1000 is a good starting point,
+and going much higher increases the likelihood of hiccups without giving you
+any real performance benefits.
+
+A more detailed template is shown in ../examples/persistent_demo/.
+Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
+guards can be used to suppress it when using other compilers.
+
+Note that as with the previous mode, the feature is easy to misuse; if you
+do not fully reset the critical state, you may end up with false positives or
+waste a whole lot of CPU power doing nothing useful at all. Be particularly
+wary of memory leaks and of the state of file descriptors.
+
+PS. Because there are task switches still involved, the mode isn't as fast as
+"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
+faster than the normal fork() model, and compared to in-process fuzzing,
+should be a lot more robust.
+
+## 5) Shared memory fuzzing
+
+You can speed up the fuzzing process even more by receiving the fuzzing data
+via shared memory instead of stdin or files.
+This is a further speed multiplier of about 2x.
+
+Setting this up is very easy:
+
+After the includes set the following macro:
+
+```
+__AFL_FUZZ_INIT();
+```
+Directly at the start of main - or if you are using the deferred forkserver
+with `__AFL_INIT()` then *after* `__AFL_INIT? :
+```
+ unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+```
+
+Then as first line after the `__AFL_LOOP` while loop:
+```
+ int len = __AFL_FUZZ_TESTCASE_LEN;
+```
+and that is all!
diff --git a/instrumentation/README.snapshot.md b/instrumentation/README.snapshot.md
new file mode 100644
index 00000000..c40a956a
--- /dev/null
+++ b/instrumentation/README.snapshot.md
@@ -0,0 +1,16 @@
+# AFL++ snapshot feature
+
+Snapshotting is a feature that makes a snapshot from a process and then
+restores its state, which is faster then forking it again.
+
+All targets compiled with llvm_mode are automatically enabled for the
+snapshot feature.
+
+To use the snapshot feature for fuzzing compile and load this kernel
+module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM)
+
+Note that is has little value for persistent (__AFL_LOOP) fuzzing.
+
+## Notes
+
+Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications.
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
new file mode 100644
index 00000000..82e55218
--- /dev/null
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -0,0 +1,1613 @@
+/* SanitizeCoverage.cpp ported to afl++ LTO :-) */
+
+#define AFL_LLVM_PASS
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <set>
+#include <iostream>
+
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include "config.h"
+#include "debug.h"
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sancov"
+
+static const char *const SanCovTracePCIndirName =
+ "__sanitizer_cov_trace_pc_indir";
+static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
+// static const char *const SanCovTracePCGuardName =
+// "__sanitizer_cov_trace_pc_guard";
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_cntrs";
+static const char *const SanCovBoolFlagSectionName = "sancov_bools";
+static const char *const SanCovPCsSectionName = "sancov_pcs";
+
+static cl::opt<int> ClCoverageLevel(
+ "lto-coverage-level",
+ cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
+ "3: all blocks and critical edges"),
+ cl::Hidden, cl::init(3));
+
+static cl::opt<bool> ClTracePC("lto-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClTracePCGuard("lto-coverage-trace-pc-guard",
+ cl::desc("pc tracing with a guard"),
+ cl::Hidden, cl::init(false));
+
+// If true, we create a global variable that contains PCs of all instrumented
+// BBs, put this global into a named section, and pass this section's bounds
+// to __sanitizer_cov_pcs_init.
+// This way the coverage instrumentation does not need to acquire the PCs
+// at run-time. Works with trace-pc-guard, inline-8bit-counters, and
+// inline-bool-flag.
+static cl::opt<bool> ClCreatePCTable("lto-coverage-pc-table",
+ cl::desc("create a static PC table"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInline8bitCounters(
+ "lto-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInlineBoolFlag(
+ "lto-coverage-inline-bool-flag",
+ cl::desc("sets a boolean flag for every edge"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClPruneBlocks(
+ "lto-coverage-prune-blocks",
+ cl::desc("Reduce the number of instrumented blocks"), cl::Hidden,
+ cl::init(true));
+
+namespace {
+
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+
+ SanitizerCoverageOptions Res;
+ switch (LegacyCoverageLevel) {
+
+ case 0:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+ break;
+ case 1:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+ break;
+ case 2:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+ break;
+ case 3:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ break;
+ case 4:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ Res.IndirectCalls = true;
+ break;
+
+ }
+
+ return Res;
+
+}
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+
+ // Sets CoverageType and IndirectCalls.
+ SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+ Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
+ Options.IndirectCalls |= CLOpts.IndirectCalls;
+ Options.TracePC |= ClTracePC;
+ Options.TracePCGuard |= ClTracePCGuard;
+ Options.Inline8bitCounters |= ClInline8bitCounters;
+ Options.InlineBoolFlag |= ClInlineBoolFlag;
+ Options.PCTable |= ClCreatePCTable;
+ Options.NoPrune |= !ClPruneBlocks;
+ if (!Options.TracePCGuard && !Options.TracePC &&
+ !Options.Inline8bitCounters && !Options.InlineBoolFlag)
+ Options.TracePCGuard = true; // TracePCGuard is default.
+ return Options;
+
+}
+
+using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>;
+using PostDomTreeCallback =
+ function_ref<const PostDominatorTree *(Function &F)>;
+
+class ModuleSanitizerCoverage {
+
+ public:
+ ModuleSanitizerCoverage(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : Options(OverrideFromCL(Options)) {
+
+ /* ,
+ const SpecialCaseList * Allowlist = nullptr,
+ const SpecialCaseList * Blocklist = nullptr)
+ ,
+ Allowlist(Allowlist),
+ Blocklist(Blocklist) {
+
+ */
+
+ }
+
+ bool instrumentModule(Module &M, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+
+ private:
+ void instrumentFunction(Function &F, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+ void InjectCoverageForIndirectCalls(Function & F,
+ ArrayRef<Instruction *> IndirCalls);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc = true);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
+ bool IsLeafFunc = true);
+ // std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char
+ // *Section,
+ // Type *Ty);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ // std::string getSectionStart(const std::string &Section) const;
+ // std::string getSectionEnd(const std::string &Section) const;
+ FunctionCallee SanCovTracePCIndir;
+ FunctionCallee SanCovTracePC /*, SanCovTracePCGuard*/;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy;
+ Module * CurModule;
+ std::string CurModuleUniqueId;
+ Triple TargetTriple;
+ LLVMContext * C;
+ const DataLayout *DL;
+
+ GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
+ GlobalVariable *FunctionBoolArray; // for inline-bool-flag.
+ GlobalVariable *FunctionPCsArray; // for pc-table.
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed;
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed;
+
+ SanitizerCoverageOptions Options;
+
+ // afl++ START
+ // const SpecialCaseList * Allowlist;
+ // const SpecialCaseList * Blocklist;
+ uint32_t autodictionary = 1;
+ uint32_t inst = 0;
+ uint32_t afl_global_id = 0;
+ uint64_t map_addr = 0;
+ char * skip_nozero = NULL;
+ std::vector<BasicBlock *> BlockList;
+ DenseMap<Value *, std::string *> valueMap;
+ std::vector<std::string> dictionary;
+ IntegerType * Int8Tyi = NULL;
+ IntegerType * Int32Tyi = NULL;
+ IntegerType * Int64Tyi = NULL;
+ ConstantInt * Zero = NULL;
+ ConstantInt * One = NULL;
+ LLVMContext * Ct = NULL;
+ Module * Mo = NULL;
+ GlobalVariable * AFLMapPtr = NULL;
+ Value * MapPtrFixed = NULL;
+ FILE * documentFile = NULL;
+ size_t found = 0;
+ // afl++ END
+
+};
+
+class ModuleSanitizerCoverageLegacyPass : public ModulePass {
+
+ public:
+ static char ID;
+ StringRef getPassName() const override {
+
+ return "sancov";
+
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+
+ }
+
+ ModuleSanitizerCoverageLegacyPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions())
+ : ModulePass(ID), Options(Options) {
+
+ /* ,
+ const std::vector<std::string> &AllowlistFiles =
+ std::vector<std::string>(),
+ const std::vector<std::string> &BlocklistFiles =
+ std::vector<std::string>())
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles,
+ *vfs::getRealFileSystem());
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles,
+ *vfs::getRealFileSystem());
+ */
+ initializeModuleSanitizerCoverageLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+
+ }
+
+ bool runOnModule(Module &M) override {
+
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ // , Allowlist.get(), Blocklist.get());
+ auto DTCallback = [this](Function &F) -> const DominatorTree * {
+
+ return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+
+ };
+
+ auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
+
+ return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
+ .getPostDomTree();
+
+ };
+
+ return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
+
+ }
+
+ private:
+ SanitizerCoverageOptions Options;
+
+ // std::unique_ptr<SpecialCaseList> Allowlist;
+ // std::unique_ptr<SpecialCaseList> Blocklist;
+
+};
+
+} // namespace
+
+PreservedAnalyses ModuleSanitizerCoveragePass::run(Module & M,
+ ModuleAnalysisManager &MAM) {
+
+ ModuleSanitizerCoverage ModuleSancov(Options);
+ // Allowlist.get(), Blocklist.get());
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto DTCallback = [&FAM](Function &F) -> const DominatorTree * {
+
+ return &FAM.getResult<DominatorTreeAnalysis>(F);
+
+ };
+
+ auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * {
+
+ return &FAM.getResult<PostDominatorTreeAnalysis>(F);
+
+ };
+
+ if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback))
+ return PreservedAnalyses::none();
+
+ return PreservedAnalyses::all();
+
+}
+
+/*
+std::pair<Value *, Value *> ModuleSanitizerCoverage::CreateSecStartEnd(
+ Module &M, const char *Section, Type *Ty) {
+
+ GlobalVariable *SecStart =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+ IRBuilder<> IRB(M.getContext());
+ Value * SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty);
+ if (!TargetTriple.isOSBinFormatCOFF())
+ return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr);
+
+ // Account for the fact that on windows-msvc __start_* symbols actually
+ // point to a uint64_t before the start of the array.
+ auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
+ ConstantInt::get(IntptrTy, sizeof(uint64_t)));
+ return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr);
+
+}
+
+*/
+
+bool ModuleSanitizerCoverage::instrumentModule(
+ Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false;
+ /*
+ if (Allowlist &&
+ !Allowlist->inSection("coverage", "src", M.getSourceFileName()))
+ return false;
+ if (Blocklist &&
+ Blocklist->inSection("coverage", "src", M.getSourceFileName()))
+ return false;
+ */
+ BlockList.clear();
+ valueMap.clear();
+ dictionary.clear();
+ C = &(M.getContext());
+ DL = &M.getDataLayout();
+ CurModule = &M;
+ CurModuleUniqueId = getUniqueModuleId(CurModule);
+ TargetTriple = Triple(M.getTargetTriple());
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
+ FunctionBoolArray = nullptr;
+ FunctionPCsArray = nullptr;
+ IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
+ IntptrPtrTy = PointerType::getUnqual(IntptrTy);
+ Type * VoidTy = Type::getVoidTy(*C);
+ IRBuilder<> IRB(*C);
+ Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+ Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
+ Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
+ Int64Ty = IRB.getInt64Ty();
+ Int32Ty = IRB.getInt32Ty();
+ Int16Ty = IRB.getInt16Ty();
+ Int8Ty = IRB.getInt8Ty();
+ Int1Ty = IRB.getInt1Ty();
+
+ /* afl++ START */
+ char * ptr;
+ LLVMContext &Ctx = M.getContext();
+ Ct = &Ctx;
+ Int8Tyi = IntegerType::getInt8Ty(Ctx);
+ Int32Tyi = IntegerType::getInt32Ty(Ctx);
+ Int64Tyi = IntegerType::getInt64Ty(Ctx);
+
+ /* Show a banner */
+ setvbuf(stdout, NULL, _IONBF, 0);
+ if (getenv("AFL_DEBUG")) debug = 1;
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
+
+ SAYF(cCYA "afl-llvm-lto" VERSION cRST
+ " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
+
+ } else
+
+ be_quiet = 1;
+
+ skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
+ if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
+ if ((afl_global_id = atoi(ptr)) < 0)
+ FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is negative\n", ptr);
+
+ if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) {
+
+ if ((documentFile = fopen(ptr, "a")) == NULL)
+ WARNF("Cannot access document file %s", ptr);
+
+ }
+
+ // we make this the default as the fixed map has problems with
+ // defered forkserver, early constructors, ifuncs and maybe more
+ /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/
+ map_addr = 0;
+
+ if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) {
+
+ uint64_t val;
+ if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) {
+
+ map_addr = 0;
+
+ } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) {
+
+ FATAL(
+ "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together");
+
+ } else if (strncmp(ptr, "0x", 2) != 0) {
+
+ map_addr = 0x10000; // the default
+
+ } else {
+
+ val = strtoull(ptr, NULL, 16);
+ if (val < 0x100 || val > 0xffffffff00000000) {
+
+ FATAL(
+ "AFL_LLVM_MAP_ADDR must be a value between 0x100 and "
+ "0xffffffff00000000");
+
+ }
+
+ map_addr = val;
+
+ }
+
+ }
+
+ /* Get/set the globals for the SHM region. */
+
+ if (!map_addr) {
+
+ AFLMapPtr =
+ new GlobalVariable(M, PointerType::get(Int8Tyi, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+
+ } else {
+
+ ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr);
+ MapPtrFixed =
+ ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Tyi));
+
+ }
+
+ Zero = ConstantInt::get(Int8Tyi, 0);
+ One = ConstantInt::get(Int8Tyi, 1);
+
+ scanForDangerousFunctions(&M);
+ Mo = &M;
+
+ if (autodictionary) {
+
+ for (auto &F : M) {
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+ CmpInst * cmpInst = nullptr;
+
+ if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ Value * op = cmpInst->getOperand(1);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op);
+
+ if (ilen && ilen->uge(0xffffffffffffffff) == false) {
+
+ u64 val2 = 0, val = ilen->getZExtValue();
+ u32 len = 0;
+ if (val > 0x10000 && val < 0xffffffff) len = 4;
+ if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8;
+
+ if (len) {
+
+ auto c = cmpInst->getPredicate();
+
+ switch (c) {
+
+ case CmpInst::FCMP_OGT: // fall through
+ case CmpInst::FCMP_OLE: // fall through
+ case CmpInst::ICMP_SLE: // fall through
+ case CmpInst::ICMP_SGT:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_UGT: // fall through
+ case CmpInst::FCMP_ULE: // fall through
+ case CmpInst::ICMP_UGT: // fall through
+ case CmpInst::ICMP_ULE:
+ if ((val & 0xffff) != 0xfffe) val2 = val + 1;
+ break;
+
+ case CmpInst::FCMP_OLT: // fall through
+ case CmpInst::FCMP_OGE: // fall through
+ case CmpInst::ICMP_SLT: // fall through
+ case CmpInst::ICMP_SGE:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_ULT: // fall through
+ case CmpInst::FCMP_UGE: // fall through
+ case CmpInst::ICMP_ULT: // fall through
+ case CmpInst::ICMP_UGE:
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ default:
+ val2 = 0;
+
+ }
+
+ dictionary.push_back(std::string((char *)&val, len));
+ found++;
+
+ if (val2) {
+
+ dictionary.push_back(std::string((char *)&val2, len));
+ found++;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ bool isStrcmp = true;
+ bool isMemcmp = true;
+ bool isStrncmp = true;
+ bool isStrcasecmp = true;
+ bool isStrncasecmp = true;
+ bool isIntMemcpy = true;
+ bool isStdString = true;
+ bool addedNull = false;
+ size_t optLen = 0;
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee) continue;
+ if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+ std::string FuncName = Callee->getName().str();
+ isStrcmp &= !FuncName.compare("strcmp");
+ isMemcmp &=
+ (!FuncName.compare("memcmp") || !FuncName.compare("bcmp"));
+ isStrncmp &= !FuncName.compare("strncmp");
+ isStrcasecmp &= !FuncName.compare("strcasecmp");
+ isStrncasecmp &= !FuncName.compare("strncasecmp");
+ isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+ isStdString &=
+ ((FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("compare") != std::string::npos) ||
+ (FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("find") != std::string::npos));
+
+ /* we do something different here, putting this BB and the
+ successors in a block map */
+ if (!FuncName.compare("__afl_persistent_loop")) {
+
+ BlockList.push_back(&BB);
+ for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB);
+ SI != SE; ++SI) {
+
+ BasicBlock *succ = *SI;
+ BlockList.push_back(succ);
+
+ }
+
+ }
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
+ * prototype */
+ FunctionType *FT = Callee->getFunctionType();
+
+ isStrcmp &= FT->getNumParams() == 2 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext());
+ isStrcasecmp &= FT->getNumParams() == 2 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext());
+ isMemcmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncasecmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStdString &= FT->getNumParams() >= 2 &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy();
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* is a str{n,}{case,}cmp/memcmp, check if we have
+ * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+ * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
+ * memcmp(x, "const", ..) or memcmp("const", x, ..) */
+ Value *Str1P = callInst->getArgOperand(0),
+ *Str2P = callInst->getArgOperand(1);
+ std::string Str1, Str2;
+ StringRef TmpStr;
+ bool HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+ if (TmpStr.empty())
+ HasStr1 = false;
+ else
+ Str1 = TmpStr.str();
+ bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+ if (TmpStr.empty())
+ HasStr2 = false;
+ else
+ Str2 = TmpStr.str();
+
+ if (debug)
+ fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+ FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
+ Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
+ Str2P->getName().str().c_str(), Str2.c_str(),
+ HasStr2 == true ? "true" : "false");
+
+ // we handle the 2nd parameter first because of llvm memcpy
+ if (!HasStr2) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array = dyn_cast<ConstantDataArray>(
+ Var->getInitializer())) {
+
+ HasStr2 = true;
+ Str2 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // for the internal memcpy routine we only care for the second
+ // parameter and are not reporting anything.
+ if (isIntMemcpy == true) {
+
+ if (HasStr2 == true) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = Str2.size();
+ uint64_t optLength = ilen->getZExtValue();
+ if (literalLength + 1 == optLength) {
+
+ Str2.append("\0", 1); // add null byte
+ addedNull = true;
+
+ }
+
+ }
+
+ valueMap[Str1P] = new std::string(Str2);
+
+ if (debug)
+ fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
+ continue;
+
+ }
+
+ continue;
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr2) {
+
+ std::string *strng = valueMap[Str2P];
+ if (strng && !strng->empty()) {
+
+ Str2 = *strng;
+ HasStr2 = true;
+ if (debug)
+ fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+ Str2P);
+
+ }
+
+ }
+
+ if (!HasStr1) {
+
+ auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array = dyn_cast<ConstantDataArray>(
+ Var->getInitializer())) {
+
+ HasStr1 = true;
+ Str1 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr1) {
+
+ std::string *strng = valueMap[Str1P];
+ if (strng && !strng->empty()) {
+
+ Str1 = *strng;
+ HasStr1 = true;
+ if (debug)
+ fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+ Str1P);
+
+ }
+
+ }
+
+ /* handle cases of one string is const, one string is variable */
+ if (!(HasStr1 ^ HasStr2)) continue;
+
+ std::string thestring;
+
+ if (HasStr1)
+ thestring = Str1;
+ else
+ thestring = Str2;
+
+ optLen = thestring.length();
+
+ if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = optLen;
+ optLen = ilen->getZExtValue();
+ if (literalLength + 1 == optLen) { // add null byte
+ thestring.append("\0", 1);
+ addedNull = true;
+
+ }
+
+ }
+
+ }
+
+ // add null byte if this is a string compare function and a null
+ // was not already added
+ if (!isMemcmp) {
+
+ if (addedNull == false) {
+
+ thestring.append("\0", 1); // add null byte
+ optLen++;
+
+ }
+
+ // ensure we do not have garbage
+ size_t offset = thestring.find('\0', 0);
+ if (offset + 1 < optLen) optLen = offset + 1;
+ thestring = thestring.substr(0, optLen);
+
+ }
+
+ if (!be_quiet) {
+
+ std::string outstring;
+ fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen,
+ thestring.length());
+ for (uint8_t i = 0; i < thestring.length(); i++) {
+
+ uint8_t c = thestring[i];
+ if (c <= 32 || c >= 127)
+ fprintf(stderr, "\\x%02x", c);
+ else
+ fprintf(stderr, "%c", c);
+
+ }
+
+ fprintf(stderr, "\"\n");
+
+ }
+
+ // we take the longer string, even if the compare was to a
+ // shorter part. Note that depending on the optimizer of the
+ // compiler this can be wrong, but it is more likely that this
+ // is helping the fuzzer
+ if (optLen != thestring.length()) optLen = thestring.length();
+ if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+ if (optLen < MIN_AUTO_EXTRA) // too short? skip
+ continue;
+
+ dictionary.push_back(thestring.substr(0, optLen));
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // afl++ END
+
+ SanCovTracePCIndir =
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+ // Make sure smaller parameters are zero-extended to i64 as required by the
+ // x86_64 ABI.
+ AttributeList SanCovTraceCmpZeroExtAL;
+ if (TargetTriple.getArch() == Triple::x86_64) {
+
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+
+ }
+
+ SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+
+ // SanCovTracePCGuard =
+ // M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
+
+ for (auto &F : M)
+ instrumentFunction(F, DTCallback, PDTCallback);
+
+ // afl++ START
+ if (documentFile) {
+
+ fclose(documentFile);
+ documentFile = NULL;
+
+ }
+
+ if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) {
+
+ // yes we could create our own function, insert it into ctors ...
+ // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o
+
+ Function *f = M.getFunction("__afl_auto_init_globals");
+
+ if (!f) {
+
+ fprintf(stderr,
+ "Error: init function could not be found (this should not "
+ "happen)\n");
+ exit(-1);
+
+ }
+
+ BasicBlock *bb = &f->getEntryBlock();
+ if (!bb) {
+
+ fprintf(stderr,
+ "Error: init function does not have an EntryBlock (this should "
+ "not happen)\n");
+ exit(-1);
+
+ }
+
+ BasicBlock::iterator IP = bb->getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+
+ if (map_addr) {
+
+ GlobalVariable *AFLMapAddrFixed = new GlobalVariable(
+ M, Int64Tyi, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr");
+ ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr);
+ StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
+ StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(Ctx, None));
+
+ }
+
+ if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
+
+ uint32_t write_loc = afl_global_id;
+
+ if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+
+ GlobalVariable *AFLFinalLoc =
+ new GlobalVariable(M, Int32Tyi, true, GlobalValue::ExternalLinkage, 0,
+ "__afl_final_loc");
+ ConstantInt *const_loc = ConstantInt::get(Int32Tyi, write_loc);
+ StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
+ StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(Ctx, None));
+
+ }
+
+ if (dictionary.size()) {
+
+ size_t memlen = 0, count = 0, offset = 0;
+ char * ptr;
+
+ // sort and unique the dictionary
+ std::sort(dictionary.begin(), dictionary.end());
+ auto last = std::unique(dictionary.begin(), dictionary.end());
+ dictionary.erase(last, dictionary.end());
+
+ for (auto token : dictionary) {
+
+ memlen += token.length();
+ count++;
+
+ }
+
+ if (!be_quiet)
+ printf("AUTODICTIONARY: %lu string%s found\n", count,
+ count == 1 ? "" : "s");
+
+ if (count) {
+
+ if ((ptr = (char *)malloc(memlen + count)) == NULL) {
+
+ fprintf(stderr, "Error: malloc for %lu bytes failed!\n",
+ memlen + count);
+ exit(-1);
+
+ }
+
+ count = 0;
+
+ for (auto token : dictionary) {
+
+ if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
+
+ ptr[offset++] = (uint8_t)token.length();
+ memcpy(ptr + offset, token.c_str(), token.length());
+ offset += token.length();
+ count++;
+
+ }
+
+ }
+
+ GlobalVariable *AFLDictionaryLen =
+ new GlobalVariable(M, Int32Tyi, false, GlobalValue::ExternalLinkage,
+ 0, "__afl_dictionary_len");
+ ConstantInt *const_len = ConstantInt::get(Int32Tyi, offset);
+ StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
+ StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(Ctx, None));
+
+ ArrayType *ArrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset);
+ GlobalVariable *AFLInternalDictionary = new GlobalVariable(
+ M, ArrayTy, true, GlobalValue::ExternalLinkage,
+ ConstantDataArray::get(Ctx,
+ *(new ArrayRef<char>((char *)ptr, offset))),
+ "__afl_internal_dictionary");
+ AFLInternalDictionary->setInitializer(ConstantDataArray::get(
+ Ctx, *(new ArrayRef<char>((char *)ptr, offset))));
+ AFLInternalDictionary->setConstant(true);
+
+ GlobalVariable *AFLDictionary = new GlobalVariable(
+ M, PointerType::get(Int8Tyi, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_dictionary");
+
+ Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero);
+ Value *AFLDictPtr =
+ IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Tyi, 0));
+ StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
+ StoreDict->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(Ctx, None));
+
+ }
+
+ }
+
+ }
+
+ /* Say something nice. */
+
+ if (!be_quiet) {
+
+ if (!inst)
+ WARNF("No instrumentation targets found.");
+ else {
+
+ char modeline[100];
+ snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+ getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+ getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+ getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+ getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+ OKF("Instrumented %u locations with no collisions (on average %llu "
+ "collisions would be in afl-gcc/afl-clang-fast) (%s mode).",
+ inst, calculateCollisions(inst), modeline);
+
+ }
+
+ }
+
+ // afl++ END
+
+ // We don't reference these arrays directly in any of our runtime functions,
+ // so we need to prevent them from being dead stripped.
+ if (TargetTriple.isOSBinFormatMachO()) appendToUsed(M, GlobalsToAppendToUsed);
+ appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed);
+ return true;
+
+}
+
+// True if block has successors and it dominates all of them.
+static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
+
+ if (succ_begin(BB) == succ_end(BB)) return false;
+
+ for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
+
+ if (!DT->dominates(BB, SUCC)) return false;
+
+ }
+
+ return true;
+
+}
+
+// True if block has predecessors and it postdominates all of them.
+static bool isFullPostDominator(const BasicBlock * BB,
+ const PostDominatorTree *PDT) {
+
+ if (pred_begin(BB) == pred_end(BB)) return false;
+
+ for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
+
+ if (!PDT->dominates(BB, PRED)) return false;
+
+ }
+
+ return true;
+
+}
+
+static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
+ const DominatorTree * DT,
+ const PostDominatorTree * PDT,
+ const SanitizerCoverageOptions &Options) {
+
+ // Don't insert coverage for blocks containing nothing but unreachable: we
+ // will never call __sanitizer_cov() for them, so counting them in
+ // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+ // percentage. Also, unreachable instructions frequently have no debug
+ // locations.
+ if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime())) return false;
+
+ // Don't insert coverage into blocks without a valid insertion point
+ // (catchswitch blocks).
+ if (BB->getFirstInsertionPt() == BB->end()) return false;
+
+ // afl++ START
+ if (!Options.NoPrune && &F.getEntryBlock() == BB && F.size() > 1)
+ return false;
+ // afl++ END
+
+ if (Options.NoPrune || &F.getEntryBlock() == BB) return true;
+
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function &&
+ &F.getEntryBlock() != BB)
+ return false;
+
+ // Do not instrument full dominators, or full post-dominators with multiple
+ // predecessors.
+ return !isFullDominator(BB, DT) &&
+ !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
+
+}
+
+void ModuleSanitizerCoverage::instrumentFunction(
+ Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+
+ if (F.empty()) return;
+ if (F.getName().find(".module_ctor") != std::string::npos)
+ return; // Should not instrument sanitizer init functions.
+ if (F.getName().startswith("__sanitizer_"))
+ return; // Don't instrument __sanitizer_* callbacks.
+ // Don't touch available_externally functions, their actual body is elewhere.
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return;
+ // Don't instrument MSVC CRT configuration helpers. They may run before normal
+ // initialization.
+ if (F.getName() == "__local_stdio_printf_options" ||
+ F.getName() == "__local_stdio_scanf_options")
+ return;
+ if (isa<UnreachableInst>(F.getEntryBlock().getTerminator())) return;
+ // Don't instrument functions using SEH for now. Splitting basic blocks like
+ // we do for coverage breaks WinEHPrepare.
+ // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+ if (F.hasPersonalityFn() &&
+ isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ return;
+ // if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName()))
+ // return;
+ // if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName()))
+ // return;
+
+ // afl++ START
+ if (!F.size()) return;
+ if (isIgnoreFunction(&F)) return;
+ // afl++ END
+
+ if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+ SplitAllCriticalEdges(
+ F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
+ SmallVector<Instruction *, 8> IndirCalls;
+ SmallVector<BasicBlock *, 16> BlocksToInstrument;
+
+ const DominatorTree * DT = DTCallback(F);
+ const PostDominatorTree *PDT = PDTCallback(F);
+ bool IsLeafFunc = true;
+
+ for (auto &BB : F) {
+
+ if (shouldInstrumentBlock(F, &BB, DT, PDT, Options))
+ BlocksToInstrument.push_back(&BB);
+ for (auto &Inst : BB) {
+
+ if (Options.IndirectCalls) {
+
+ CallBase *CB = dyn_cast<CallBase>(&Inst);
+ if (CB && !CB->getCalledFunction()) IndirCalls.push_back(&Inst);
+
+ }
+
+ }
+
+ }
+
+ InjectCoverage(F, BlocksToInstrument, IsLeafFunc);
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
+
+ if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+ if (auto Comdat =
+ GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+ Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
+ GlobalsToAppendToUsed.push_back(Array);
+ GlobalsToAppendToCompilerUsed.push_back(Array);
+ MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+ Array->addMetadata(LLVMContext::MD_associated, *MD);
+
+ return Array;
+
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreatePCArray(
+ Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+
+ size_t N = AllBlocks.size();
+ assert(N);
+ SmallVector<Constant *, 32> PCs;
+ IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt());
+ for (size_t i = 0; i < N; i++) {
+
+ if (&F.getEntryBlock() == AllBlocks[i]) {
+
+ PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 1), IntptrPtrTy));
+
+ } else {
+
+ PCs.push_back((Constant *)IRB.CreatePointerCast(
+ BlockAddress::get(AllBlocks[i]), IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 0), IntptrPtrTy));
+
+ }
+
+ }
+
+ auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy,
+ SanCovPCsSectionName);
+ PCArray->setInitializer(
+ ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs));
+ PCArray->setConstant(true);
+
+ return PCArray;
+
+}
+
+void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
+ Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
+ if (Options.InlineBoolFlag)
+ FunctionBoolArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName);
+ if (Options.PCTable) FunctionPCsArray = CreatePCArray(F, AllBlocks);
+
+}
+
+bool ModuleSanitizerCoverage::InjectCoverage(Function & F,
+ ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc) {
+
+ if (AllBlocks.empty()) return false;
+ CreateFunctionLocalArrays(F, AllBlocks);
+ for (size_t i = 0, N = AllBlocks.size(); i < N; i++) {
+
+ // afl++ START
+ if (BlockList.size()) {
+
+ int skip = 0;
+ for (uint32_t k = 0; k < BlockList.size(); k++) {
+
+ if (AllBlocks[i] == BlockList[k]) {
+
+ if (debug)
+ fprintf(stderr,
+ "DEBUG: Function %s skipping BB with/after __afl_loop\n",
+ F.getName().str().c_str());
+ skip = 1;
+
+ }
+
+ }
+
+ if (skip) continue;
+
+ }
+
+ // afl++ END
+
+ InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+
+ }
+
+ return true;
+
+}
+
+// On every indirect call we call a run-time function
+// __sanitizer_cov_indir_call* with two parameters:
+// - callee address,
+// - global cache array that contains CacheSize pointers (zero-initialized).
+// The cache is used to speed up recording the caller-callee pairs.
+// The address of the caller is passed implicitly via caller PC.
+// CacheSize is encoded in the name of the run-time function.
+void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
+ Function &F, ArrayRef<Instruction *> IndirCalls) {
+
+ if (IndirCalls.empty()) return;
+ assert(Options.TracePC || Options.TracePCGuard ||
+ Options.Inline8bitCounters || Options.InlineBoolFlag);
+ for (auto I : IndirCalls) {
+
+ IRBuilder<> IRB(I);
+ CallBase & CB = cast<CallBase>(*I);
+ Value * Callee = CB.getCalledOperand();
+ if (isa<InlineAsm>(Callee)) continue;
+ IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
+
+ }
+
+}
+
+void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+ size_t Idx,
+ bool IsLeafFunc) {
+
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
+ bool IsEntryBB = &BB == &F.getEntryBlock();
+ DebugLoc EntryLoc;
+ if (IsEntryBB) {
+
+ if (auto SP = F.getSubprogram())
+ EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ // Keep static allocas and llvm.localescape calls in the entry block. Even
+ // if we aren't splitting the block, it's nice for allocas to be before
+ // calls.
+ IP = PrepareToSplitEntryBlock(BB, IP);
+
+ } else {
+
+ EntryLoc = IP->getDebugLoc();
+
+ }
+
+ IRBuilder<> IRB(&*IP);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ if (Options.TracePC) {
+
+ IRB.CreateCall(SanCovTracePC)
+#if LLVM_VERSION_MAJOR < 12
+ ->cannotMerge(); // gets the PC using GET_CALLER_PC.
+#else
+ ->setCannotMerge(); // gets the PC using GET_CALLER_PC.
+#endif
+
+ }
+
+ if (Options.TracePCGuard) {
+
+ // afl++ START
+ ++afl_global_id;
+
+ if (documentFile) {
+
+ unsigned long long int moduleID =
+ (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid();
+ fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", moduleID,
+ F.getName().str().c_str(), afl_global_id);
+
+ }
+
+ /* Set the ID of the inserted basic block */
+
+ ConstantInt *CurLoc = ConstantInt::get(Int32Tyi, afl_global_id);
+
+ /* Load SHM pointer */
+
+ Value *MapPtrIdx;
+
+ if (map_addr) {
+
+ MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc);
+
+ } else {
+
+ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+ MapPtr->setMetadata(Mo->getMDKindID("nosanitize"),
+ MDNode::get(*Ct, None));
+ MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
+
+ }
+
+ /* Update bitmap */
+
+ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+ Counter->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None));
+
+ Value *Incr = IRB.CreateAdd(Counter, One);
+
+ if (skip_nozero == NULL) {
+
+ auto cf = IRB.CreateICmpEQ(Incr, Zero);
+ auto carry = IRB.CreateZExt(cf, Int8Tyi);
+ Incr = IRB.CreateAdd(Incr, carry);
+
+ }
+
+ IRB.CreateStore(Incr, MapPtrIdx)
+ ->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None));
+
+ // done :)
+
+ inst++;
+ // afl++ END
+
+ /*
+ XXXXXXXXXXXXXXXXXXX
+
+ auto GuardPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+ ConstantInt::get(IntptrTy, Idx * 4)),
+ Int32PtrTy);
+
+ IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge();
+ */
+
+ }
+
+ if (Options.Inline8bitCounters) {
+
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray->getValueType(), Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+
+ }
+
+ if (Options.InlineBoolFlag) {
+
+ auto FlagPtr = IRB.CreateGEP(
+ FunctionBoolArray->getValueType(), FunctionBoolArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(Int1Ty, FlagPtr);
+ auto ThenTerm =
+ SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false);
+ IRBuilder<> ThenIRB(ThenTerm);
+ auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+
+ }
+
+}
+
+std::string ModuleSanitizerCoverage::getSectionName(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatCOFF()) {
+
+ if (Section == SanCovCountersSectionName) return ".SCOV$CM";
+ if (Section == SanCovBoolFlagSectionName) return ".SCOV$BM";
+ if (Section == SanCovPCsSectionName) return ".SCOVP$M";
+ return ".SCOV$GM"; // For SanCovGuardsSectionName.
+
+ }
+
+ if (TargetTriple.isOSBinFormatMachO()) return "__DATA,__" + Section;
+ return "__" + Section;
+
+}
+
+/*
+std::string ModuleSanitizerCoverage::getSectionStart(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
+
+}
+
+std::string ModuleSanitizerCoverage::getSectionEnd(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
+
+}
+
+*/
+
+char ModuleSanitizerCoverageLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+
+ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
+ const SanitizerCoverageOptions &Options,
+ const std::vector<std::string> &AllowlistFiles,
+ const std::vector<std::string> &BlocklistFiles) {
+
+ return new ModuleSanitizerCoverageLegacyPass(Options);
+ //, AllowlistFiles, BlocklistFiles);
+
+}
+
+static void registerLTOPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new ModuleSanitizerCoverageLegacyPass();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterCompTransPass(
+ PassManagerBuilder::EP_OptimizerLast, registerLTOPass);
+
+static RegisterStandardPasses RegisterCompTransPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerLTOPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterCompTransPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerLTOPass);
+#endif
+
diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc
new file mode 100644
index 00000000..b3c55108
--- /dev/null
+++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc
@@ -0,0 +1,1349 @@
+//===-- SanitizerCoverage.cpp - coverage instrumentation for sanitizers ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coverage instrumentation done on LLVM IR level, works with Sanitizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SpecialCaseList.h"
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ #include "llvm/Support/VirtualFileSystem.h"
+#endif
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include "config.h"
+#include "debug.h"
+#include "afl-llvm-common.h"
+
+namespace llvm {
+
+/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The
+/// pass instruments functions for coverage, adds initialization calls to the
+/// module for trace PC guards and 8bit counters if they are requested, and
+/// appends globals to llvm.compiler.used.
+class ModuleSanitizerCoveragePass
+ : public PassInfoMixin<ModuleSanitizerCoveragePass> {
+
+ public:
+ explicit ModuleSanitizerCoveragePass(
+ SanitizerCoverageOptions Options = SanitizerCoverageOptions(),
+ const std::vector<std::string> &AllowlistFiles =
+ std::vector<std::string>(),
+ const std::vector<std::string> &BlocklistFiles =
+ std::vector<std::string>())
+ : Options(Options) {
+
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ ,
+ *vfs::getRealFileSystem()
+#endif
+ );
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ ,
+ *vfs::getRealFileSystem()
+#endif
+ );
+
+ }
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ static bool isRequired() {
+
+ return true;
+
+ }
+
+ private:
+ SanitizerCoverageOptions Options;
+
+ std::unique_ptr<SpecialCaseList> Allowlist;
+ std::unique_ptr<SpecialCaseList> Blocklist;
+
+};
+
+// Insert SanitizerCoverage instrumentation.
+ModulePass *createModuleSanitizerCoverageLegacyPassPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+ const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(),
+ const std::vector<std::string> &BlocklistFiles =
+ std::vector<std::string>());
+
+} // namespace llvm
+
+using namespace llvm;
+
+#define DEBUG_TYPE "sancov"
+
+static const char *const SanCovTracePCIndirName =
+ "__sanitizer_cov_trace_pc_indir";
+static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
+static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
+static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
+static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4";
+static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8";
+static const char *const SanCovTraceConstCmp1 =
+ "__sanitizer_cov_trace_const_cmp1";
+static const char *const SanCovTraceConstCmp2 =
+ "__sanitizer_cov_trace_const_cmp2";
+static const char *const SanCovTraceConstCmp4 =
+ "__sanitizer_cov_trace_const_cmp4";
+static const char *const SanCovTraceConstCmp8 =
+ "__sanitizer_cov_trace_const_cmp8";
+static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
+static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
+static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
+static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
+static const char *const SanCovModuleCtorTracePcGuardName =
+ "sancov.module_ctor_trace_pc_guard";
+static const char *const SanCovModuleCtor8bitCountersName =
+ "sancov.module_ctor_8bit_counters";
+static const char *const SanCovModuleCtorBoolFlagName =
+ "sancov.module_ctor_bool_flag";
+static const uint64_t SanCtorAndDtorPriority = 2;
+
+static const char *const SanCovTracePCGuardName =
+ "__sanitizer_cov_trace_pc_guard";
+static const char *const SanCovTracePCGuardInitName =
+ "__sanitizer_cov_trace_pc_guard_init";
+static const char *const SanCov8bitCountersInitName =
+ "__sanitizer_cov_8bit_counters_init";
+static const char *const SanCovBoolFlagInitName =
+ "__sanitizer_cov_bool_flag_init";
+static const char *const SanCovPCsInitName = "__sanitizer_cov_pcs_init";
+
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_cntrs";
+static const char *const SanCovBoolFlagSectionName = "sancov_bools";
+static const char *const SanCovPCsSectionName = "sancov_pcs";
+
+static const char *const SanCovLowestStackName = "__sancov_lowest_stack";
+
+static char *skip_nozero;
+
+/*
+static cl::opt<int> ClCoverageLevel(
+ "sanitizer-coverage-level",
+ cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
+ "3: all blocks and critical edges"),
+ cl::Hidden, cl::init(3));
+
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
+ cl::desc("pc tracing with a guard"),
+ cl::Hidden, cl::init(true));
+
+// If true, we create a global variable that contains PCs of all instrumented
+// BBs, put this global into a named section, and pass this section's bounds
+// to __sanitizer_cov_pcs_init.
+// This way the coverage instrumentation does not need to acquire the PCs
+// at run-time. Works with trace-pc-guard, inline-8bit-counters, and
+// inline-bool-flag.
+static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table",
+ cl::desc("create a static PC table"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClInline8bitCounters(
+ "sanitizer-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClInlineBoolFlag(
+ "sanitizer-coverage-inline-bool-flag",
+ cl::desc("sets a boolean flag for every edge"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClCMPTracing(
+ "sanitizer-coverage-trace-compares",
+ cl::desc("Tracing of CMP and similar instructions"), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
+ cl::desc("Tracing of DIV instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
+ cl::desc("Tracing of GEP instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClPruneBlocks(
+ "sanitizer-coverage-prune-blocks",
+ cl::desc("Reduce the number of instrumented blocks"), cl::Hidden,
+ cl::init(true));
+
+static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth",
+ cl::desc("max stack depth tracing"),
+ cl::Hidden, cl::init(false));
+*/
+namespace {
+
+/*
+SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
+
+ SanitizerCoverageOptions Res;
+ switch (LegacyCoverageLevel) {
+
+ case 0:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_None;
+ break;
+ case 1:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
+ break;
+ case 2:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
+ break;
+ case 3:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ break;
+ case 4:
+ Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
+ Res.IndirectCalls = true;
+ break;
+
+ }
+
+ return Res;
+
+}
+
+*/
+
+SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
+
+ // Sets CoverageType and IndirectCalls.
+ // SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel);
+ Options.CoverageType =
+ SanitizerCoverageOptions::SCK_Edge; // std::max(Options.CoverageType,
+ // CLOpts.CoverageType);
+ Options.IndirectCalls = false; // CLOpts.IndirectCalls;
+ Options.TraceCmp = false; //|= ClCMPTracing;
+ Options.TraceDiv = false; //|= ClDIVTracing;
+ Options.TraceGep = false; //|= ClGEPTracing;
+ Options.TracePC = false; //|= ClTracePC;
+ Options.TracePCGuard = true; // |= ClTracePCGuard;
+ Options.Inline8bitCounters = 0; //|= ClInline8bitCounters;
+ // Options.InlineBoolFlag = 0; //|= ClInlineBoolFlag;
+ Options.PCTable = false; //|= ClCreatePCTable;
+ Options.NoPrune = false; //|= !ClPruneBlocks;
+ Options.StackDepth = false; //|= ClStackDepth;
+ if (!Options.TracePCGuard && !Options.TracePC &&
+ !Options.Inline8bitCounters && !Options.StackDepth /*&&
+ !Options.InlineBoolFlag*/)
+ Options.TracePCGuard = true; // TracePCGuard is default.
+
+ return Options;
+
+}
+
+using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>;
+using PostDomTreeCallback =
+ function_ref<const PostDominatorTree *(Function &F)>;
+
+class ModuleSanitizerCoverage {
+
+ public:
+ ModuleSanitizerCoverage(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+ const SpecialCaseList * Allowlist = nullptr,
+ const SpecialCaseList * Blocklist = nullptr)
+ : Options(OverrideFromCL(Options)),
+ Allowlist(Allowlist),
+ Blocklist(Blocklist) {
+
+ }
+
+ bool instrumentModule(Module &M, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+
+ private:
+ void instrumentFunction(Function &F, DomTreeCallback DTCallback,
+ PostDomTreeCallback PDTCallback);
+ void InjectCoverageForIndirectCalls(Function & F,
+ ArrayRef<Instruction *> IndirCalls);
+ void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ void InjectTraceForDiv(Function & F,
+ ArrayRef<BinaryOperator *> DivTraceTargets);
+ void InjectTraceForGep(Function & F,
+ ArrayRef<GetElementPtrInst *> GepTraceTargets);
+ void InjectTraceForSwitch(Function & F,
+ ArrayRef<Instruction *> SwitchTraceTargets);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc = true);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
+ bool IsLeafFunc = true);
+ Function *CreateInitCallsForSections(Module &M, const char *CtorName,
+ const char *InitFunctionName, Type *Ty,
+ const char *Section);
+ std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
+ Type *Ty);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ std::string getSectionStart(const std::string &Section) const;
+ std::string getSectionEnd(const std::string &Section) const;
+ FunctionCallee SanCovTracePCIndir;
+ FunctionCallee SanCovTracePC, SanCovTracePCGuard;
+ FunctionCallee SanCovTraceCmpFunction[4];
+ FunctionCallee SanCovTraceConstCmpFunction[4];
+ FunctionCallee SanCovTraceDivFunction[2];
+ FunctionCallee SanCovTraceGepFunction;
+ FunctionCallee SanCovTraceSwitchFunction;
+ GlobalVariable *SanCovLowestStack;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy;
+ Module * CurModule;
+ std::string CurModuleUniqueId;
+ Triple TargetTriple;
+ LLVMContext * C;
+ const DataLayout *DL;
+
+ GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
+ GlobalVariable *FunctionBoolArray; // for inline-bool-flag.
+ GlobalVariable *FunctionPCsArray; // for pc-table.
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed;
+ SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed;
+
+ SanitizerCoverageOptions Options;
+
+ const SpecialCaseList *Allowlist;
+ const SpecialCaseList *Blocklist;
+
+ uint32_t instr = 0;
+ GlobalVariable *AFLMapPtr = NULL;
+ ConstantInt * One = NULL;
+ ConstantInt * Zero = NULL;
+
+};
+
+class ModuleSanitizerCoverageLegacyPass : public ModulePass {
+
+ public:
+ ModuleSanitizerCoverageLegacyPass(
+ const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+ const std::vector<std::string> &AllowlistFiles =
+ std::vector<std::string>(),
+ const std::vector<std::string> &BlocklistFiles =
+ std::vector<std::string>())
+ : ModulePass(ID), Options(Options) {
+
+ if (AllowlistFiles.size() > 0)
+ Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ ,
+ *vfs::getRealFileSystem()
+#endif
+ );
+ if (BlocklistFiles.size() > 0)
+ Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ ,
+ *vfs::getRealFileSystem()
+#endif
+ );
+ initializeModuleSanitizerCoverageLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+
+ }
+
+ bool runOnModule(Module &M) override {
+
+ ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
+ Blocklist.get());
+ auto DTCallback = [this](Function &F) -> const DominatorTree * {
+
+ return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+
+ };
+
+ auto PDTCallback = [this](Function &F) -> const PostDominatorTree * {
+
+ return &this->getAnalysis<PostDominatorTreeWrapperPass>(F)
+ .getPostDomTree();
+
+ };
+
+ return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback);
+
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+ StringRef getPassName() const override {
+
+ return "ModuleSanitizerCoverage";
+
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+
+ }
+
+ private:
+ SanitizerCoverageOptions Options;
+
+ std::unique_ptr<SpecialCaseList> Allowlist;
+ std::unique_ptr<SpecialCaseList> Blocklist;
+
+};
+
+} // namespace
+
+PreservedAnalyses ModuleSanitizerCoveragePass::run(Module & M,
+ ModuleAnalysisManager &MAM) {
+
+ ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
+ Blocklist.get());
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto DTCallback = [&FAM](Function &F) -> const DominatorTree * {
+
+ return &FAM.getResult<DominatorTreeAnalysis>(F);
+
+ };
+
+ auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * {
+
+ return &FAM.getResult<PostDominatorTreeAnalysis>(F);
+
+ };
+
+ if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+
+}
+
+std::pair<Value *, Value *> ModuleSanitizerCoverage::CreateSecStartEnd(
+ Module &M, const char *Section, Type *Ty) {
+
+ GlobalVariable *SecStart = new GlobalVariable(
+ M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd = new GlobalVariable(
+ M, Ty->getPointerElementType(), false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+ IRBuilder<> IRB(M.getContext());
+ if (!TargetTriple.isOSBinFormatCOFF())
+ return std::make_pair(SecStart, SecEnd);
+
+ // Account for the fact that on windows-msvc __start_* symbols actually
+ // point to a uint64_t before the start of the array.
+ auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy);
+ auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr,
+ ConstantInt::get(IntptrTy, sizeof(uint64_t)));
+ return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEnd);
+
+}
+
+Function *ModuleSanitizerCoverage::CreateInitCallsForSections(
+ Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty,
+ const char *Section) {
+
+ auto SecStartEnd = CreateSecStartEnd(M, Section, Ty);
+ auto SecStart = SecStartEnd.first;
+ auto SecEnd = SecStartEnd.second;
+ Function *CtorFunc;
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, CtorName, InitFunctionName, {Ty, Ty}, {SecStart, SecEnd});
+ assert(CtorFunc->getName() == CtorName);
+
+ if (TargetTriple.supportsCOMDAT()) {
+
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(CtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+
+ } else {
+
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+
+ }
+
+ if (TargetTriple.isOSBinFormatCOFF()) {
+
+ // In COFF files, if the contructors are set as COMDAT (they are because
+ // COFF supports COMDAT) and the linker flag /OPT:REF (strip unreferenced
+ // functions and data) is used, the constructors get stripped. To prevent
+ // this, give the constructors weak ODR linkage and ensure the linker knows
+ // to include the sancov constructor. This way the linker can deduplicate
+ // the constructors but always leave one copy.
+ CtorFunc->setLinkage(GlobalValue::WeakODRLinkage);
+ appendToUsed(M, CtorFunc);
+
+ }
+
+ return CtorFunc;
+
+}
+
+bool ModuleSanitizerCoverage::instrumentModule(
+ Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+ if (getenv("AFL_DEBUG")) debug = 1;
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
+
+ SAYF(cCYA "SanitizerCoveragePCGUARD" VERSION cRST "\n");
+
+ } else
+
+ be_quiet = 1;
+
+ skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+ // scanForDangerousFunctions(&M);
+
+ if (debug) {
+
+ fprintf(stderr,
+ "SANCOV: covtype:%u indirect:%d stack:%d noprune:%d "
+ "createtable:%d tracepcguard:%d tracepc:%d\n",
+ Options.CoverageType, Options.IndirectCalls == true ? 1 : 0,
+ Options.StackDepth == true ? 1 : 0, Options.NoPrune == true ? 1 : 0,
+ // Options.InlineBoolFlag == true ? 1 : 0,
+ Options.PCTable == true ? 1 : 0,
+ Options.TracePCGuard == true ? 1 : 0,
+ Options.TracePC == true ? 1 : 0);
+
+ }
+
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false;
+ if (Allowlist &&
+ !Allowlist->inSection("coverage", "src", M.getSourceFileName()))
+ return false;
+ if (Blocklist &&
+ Blocklist->inSection("coverage", "src", M.getSourceFileName()))
+ return false;
+ C = &(M.getContext());
+ DL = &M.getDataLayout();
+ CurModule = &M;
+ CurModuleUniqueId = getUniqueModuleId(CurModule);
+ TargetTriple = Triple(M.getTargetTriple());
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
+ FunctionBoolArray = nullptr;
+ FunctionPCsArray = nullptr;
+ IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
+ IntptrPtrTy = PointerType::getUnqual(IntptrTy);
+ Type * VoidTy = Type::getVoidTy(*C);
+ IRBuilder<> IRB(*C);
+ Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+ Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
+ Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
+ Int64Ty = IRB.getInt64Ty();
+ Int32Ty = IRB.getInt32Ty();
+ Int16Ty = IRB.getInt16Ty();
+ Int8Ty = IRB.getInt8Ty();
+ Int1Ty = IRB.getInt1Ty();
+ LLVMContext &Ctx = M.getContext();
+
+ AFLMapPtr =
+ new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+ One = ConstantInt::get(IntegerType::getInt8Ty(Ctx), 1);
+ Zero = ConstantInt::get(IntegerType::getInt8Ty(Ctx), 0);
+
+ SanCovTracePCIndir =
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy);
+ // Make sure smaller parameters are zero-extended to i64 if required by the
+ // target ABI.
+ AttributeList SanCovTraceCmpZeroExtAL;
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceCmpZeroExtAL =
+ SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt);
+
+ SanCovTraceCmpFunction[0] =
+ M.getOrInsertFunction(SanCovTraceCmp1, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt8Ty(), IRB.getInt8Ty());
+ SanCovTraceCmpFunction[1] =
+ M.getOrInsertFunction(SanCovTraceCmp2, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt16Ty(), IRB.getInt16Ty());
+ SanCovTraceCmpFunction[2] =
+ M.getOrInsertFunction(SanCovTraceCmp4, SanCovTraceCmpZeroExtAL, VoidTy,
+ IRB.getInt32Ty(), IRB.getInt32Ty());
+ SanCovTraceCmpFunction[3] =
+ M.getOrInsertFunction(SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ SanCovTraceConstCmpFunction[0] = M.getOrInsertFunction(
+ SanCovTraceConstCmp1, SanCovTraceCmpZeroExtAL, VoidTy, Int8Ty, Int8Ty);
+ SanCovTraceConstCmpFunction[1] = M.getOrInsertFunction(
+ SanCovTraceConstCmp2, SanCovTraceCmpZeroExtAL, VoidTy, Int16Ty, Int16Ty);
+ SanCovTraceConstCmpFunction[2] = M.getOrInsertFunction(
+ SanCovTraceConstCmp4, SanCovTraceCmpZeroExtAL, VoidTy, Int32Ty, Int32Ty);
+ SanCovTraceConstCmpFunction[3] =
+ M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+
+ {
+
+ AttributeList AL;
+ AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
+ SanCovTraceDivFunction[0] =
+ M.getOrInsertFunction(SanCovTraceDiv4, AL, VoidTy, IRB.getInt32Ty());
+
+ }
+
+ SanCovTraceDivFunction[1] =
+ M.getOrInsertFunction(SanCovTraceDiv8, VoidTy, Int64Ty);
+ SanCovTraceGepFunction =
+ M.getOrInsertFunction(SanCovTraceGep, VoidTy, IntptrTy);
+ SanCovTraceSwitchFunction =
+ M.getOrInsertFunction(SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy);
+
+ Constant *SanCovLowestStackConstant =
+ M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy);
+ SanCovLowestStack = dyn_cast<GlobalVariable>(SanCovLowestStackConstant);
+ if (!SanCovLowestStack) {
+
+ C->emitError(StringRef("'") + SanCovLowestStackName +
+ "' should not be declared by the user");
+ return true;
+
+ }
+
+ SanCovLowestStack->setThreadLocalMode(
+ GlobalValue::ThreadLocalMode::InitialExecTLSModel);
+ if (Options.StackDepth && !SanCovLowestStack->isDeclaration())
+ SanCovLowestStack->setInitializer(Constant::getAllOnesValue(IntptrTy));
+
+ SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy);
+ SanCovTracePCGuard =
+ M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy);
+
+ for (auto &F : M)
+ instrumentFunction(F, DTCallback, PDTCallback);
+
+ Function *Ctor = nullptr;
+
+ if (FunctionGuardArray)
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName,
+ SanCovTracePCGuardInitName, Int32PtrTy,
+ SanCovGuardsSectionName);
+ if (Function8bitCounterArray)
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName,
+ SanCov8bitCountersInitName, Int8PtrTy,
+ SanCovCountersSectionName);
+ if (FunctionBoolArray) {
+
+ Ctor = CreateInitCallsForSections(M, SanCovModuleCtorBoolFlagName,
+ SanCovBoolFlagInitName, Int1PtrTy,
+ SanCovBoolFlagSectionName);
+
+ }
+
+ if (Ctor && Options.PCTable) {
+
+ auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy);
+ FunctionCallee InitFunction = declareSanitizerInitFunction(
+ M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy});
+ IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator());
+ IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second});
+
+ }
+
+ // We don't reference these arrays directly in any of our runtime functions,
+ // so we need to prevent them from being dead stripped.
+ if (TargetTriple.isOSBinFormatMachO()) appendToUsed(M, GlobalsToAppendToUsed);
+ appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed);
+
+ if (!be_quiet) {
+
+ if (!instr)
+ WARNF("No instrumentation targets found.");
+ else {
+
+ char modeline[100];
+ snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+ getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+ getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+ getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+ getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+ OKF("Instrumented %u locations with no collisions (%s mode).", instr,
+ modeline);
+
+ }
+
+ }
+
+ return true;
+
+}
+
+// True if block has successors and it dominates all of them.
+static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) {
+
+ if (succ_begin(BB) == succ_end(BB)) return false;
+
+ for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) {
+
+ if (!DT->dominates(BB, SUCC)) return false;
+
+ }
+
+ return true;
+
+}
+
+// True if block has predecessors and it postdominates all of them.
+static bool isFullPostDominator(const BasicBlock * BB,
+ const PostDominatorTree *PDT) {
+
+ if (pred_begin(BB) == pred_end(BB)) return false;
+
+ for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) {
+
+ if (!PDT->dominates(BB, PRED)) return false;
+
+ }
+
+ return true;
+
+}
+
+static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB,
+ const DominatorTree * DT,
+ const PostDominatorTree * PDT,
+ const SanitizerCoverageOptions &Options) {
+
+ // Don't insert coverage for blocks containing nothing but unreachable: we
+ // will never call __sanitizer_cov() for them, so counting them in
+ // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+ // percentage. Also, unreachable instructions frequently have no debug
+ // locations.
+ if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime())) return false;
+
+ // Don't insert coverage into blocks without a valid insertion point
+ // (catchswitch blocks).
+ if (BB->getFirstInsertionPt() == BB->end()) return false;
+
+ if (Options.NoPrune || &F.getEntryBlock() == BB) return true;
+
+ if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function &&
+ &F.getEntryBlock() != BB)
+ return false;
+
+ // Do not instrument full dominators, or full post-dominators with multiple
+ // predecessors.
+ return !isFullDominator(BB, DT) &&
+ !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor());
+
+}
+
+// Returns true iff From->To is a backedge.
+// A twist here is that we treat From->To as a backedge if
+// * To dominates From or
+// * To->UniqueSuccessor dominates From
+static bool IsBackEdge(BasicBlock *From, BasicBlock *To,
+ const DominatorTree *DT) {
+
+ if (DT->dominates(To, From)) return true;
+ if (auto Next = To->getUniqueSuccessor())
+ if (DT->dominates(Next, From)) return true;
+ return false;
+
+}
+
+// Prunes uninteresting Cmp instrumentation:
+// * CMP instructions that feed into loop backedge branch.
+//
+// Note that Cmp pruning is controlled by the same flag as the
+// BB pruning.
+static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT,
+ const SanitizerCoverageOptions &Options) {
+
+ if (!Options.NoPrune)
+ if (CMP->hasOneUse())
+ if (auto BR = dyn_cast<BranchInst>(CMP->user_back()))
+ for (BasicBlock *B : BR->successors())
+ if (IsBackEdge(BR->getParent(), B, DT)) return false;
+ return true;
+
+}
+
+void ModuleSanitizerCoverage::instrumentFunction(
+ Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) {
+
+ if (F.empty()) return;
+ if (F.getName().find(".module_ctor") != std::string::npos)
+ return; // Should not instrument sanitizer init functions.
+ if (F.getName().startswith("__sanitizer_"))
+ return; // Don't instrument __sanitizer_* callbacks.
+ // Don't touch available_externally functions, their actual body is elewhere.
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return;
+ // Don't instrument MSVC CRT configuration helpers. They may run before normal
+ // initialization.
+ if (F.getName() == "__local_stdio_printf_options" ||
+ F.getName() == "__local_stdio_scanf_options")
+ return;
+ if (isa<UnreachableInst>(F.getEntryBlock().getTerminator())) return;
+ // Don't instrument functions using SEH for now. Splitting basic blocks like
+ // we do for coverage breaks WinEHPrepare.
+ // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+ if (F.hasPersonalityFn() &&
+ isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+ return;
+ if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName()))
+ return;
+ if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) return;
+ if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
+ SplitAllCriticalEdges(
+ F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
+ SmallVector<Instruction *, 8> IndirCalls;
+ SmallVector<BasicBlock *, 16> BlocksToInstrument;
+ SmallVector<Instruction *, 8> CmpTraceTargets;
+ SmallVector<Instruction *, 8> SwitchTraceTargets;
+ SmallVector<BinaryOperator *, 8> DivTraceTargets;
+ SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
+
+ const DominatorTree * DT = DTCallback(F);
+ const PostDominatorTree *PDT = PDTCallback(F);
+ bool IsLeafFunc = true;
+
+ for (auto &BB : F) {
+
+ if (shouldInstrumentBlock(F, &BB, DT, PDT, Options))
+ BlocksToInstrument.push_back(&BB);
+ for (auto &Inst : BB) {
+
+ if (Options.IndirectCalls) {
+
+ CallBase *CB = dyn_cast<CallBase>(&Inst);
+ if (CB && !CB->getCalledFunction()) IndirCalls.push_back(&Inst);
+
+ }
+
+ if (Options.TraceCmp) {
+
+ if (ICmpInst *CMP = dyn_cast<ICmpInst>(&Inst))
+ if (IsInterestingCmp(CMP, DT, Options))
+ CmpTraceTargets.push_back(&Inst);
+ if (isa<SwitchInst>(&Inst)) SwitchTraceTargets.push_back(&Inst);
+
+ }
+
+ if (Options.TraceDiv)
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&Inst))
+ if (BO->getOpcode() == Instruction::SDiv ||
+ BO->getOpcode() == Instruction::UDiv)
+ DivTraceTargets.push_back(BO);
+ if (Options.TraceGep)
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
+ GepTraceTargets.push_back(GEP);
+ if (Options.StackDepth)
+ if (isa<InvokeInst>(Inst) ||
+ (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))
+ IsLeafFunc = false;
+
+ }
+
+ }
+
+ InjectCoverage(F, BlocksToInstrument, IsLeafFunc);
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+ InjectTraceForCmp(F, CmpTraceTargets);
+ InjectTraceForSwitch(F, SwitchTraceTargets);
+ InjectTraceForDiv(F, DivTraceTargets);
+ InjectTraceForGep(F, GepTraceTargets);
+
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
+
+ if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
+ if (auto Comdat =
+ GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
+ Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
+#else
+ Array->setAlignment(Align(4)); // cheating
+#endif
+ GlobalsToAppendToUsed.push_back(Array);
+ GlobalsToAppendToCompilerUsed.push_back(Array);
+ MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F));
+ Array->addMetadata(LLVMContext::MD_associated, *MD);
+
+ return Array;
+
+}
+
+GlobalVariable *ModuleSanitizerCoverage::CreatePCArray(
+ Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+
+ size_t N = AllBlocks.size();
+ assert(N);
+ SmallVector<Constant *, 32> PCs;
+ IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt());
+ for (size_t i = 0; i < N; i++) {
+
+ if (&F.getEntryBlock() == AllBlocks[i]) {
+
+ PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 1), IntptrPtrTy));
+
+ } else {
+
+ PCs.push_back((Constant *)IRB.CreatePointerCast(
+ BlockAddress::get(AllBlocks[i]), IntptrPtrTy));
+ PCs.push_back((Constant *)IRB.CreateIntToPtr(
+ ConstantInt::get(IntptrTy, 0), IntptrPtrTy));
+
+ }
+
+ }
+
+ auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy,
+ SanCovPCsSectionName);
+ PCArray->setInitializer(
+ ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs));
+ PCArray->setConstant(true);
+
+ return PCArray;
+
+}
+
+void ModuleSanitizerCoverage::CreateFunctionLocalArrays(
+ Function &F, ArrayRef<BasicBlock *> AllBlocks) {
+
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName);
+
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName);
+ /*
+ if (Options.InlineBoolFlag)
+ FunctionBoolArray = CreateFunctionLocalArrayInSection(
+ AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName);
+ */
+ if (Options.PCTable) FunctionPCsArray = CreatePCArray(F, AllBlocks);
+
+}
+
+bool ModuleSanitizerCoverage::InjectCoverage(Function & F,
+ ArrayRef<BasicBlock *> AllBlocks,
+ bool IsLeafFunc) {
+
+ if (AllBlocks.empty()) return false;
+ CreateFunctionLocalArrays(F, AllBlocks);
+ for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
+ InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc);
+ return true;
+
+}
+
+// On every indirect call we call a run-time function
+// __sanitizer_cov_indir_call* with two parameters:
+// - callee address,
+// - global cache array that contains CacheSize pointers (zero-initialized).
+// The cache is used to speed up recording the caller-callee pairs.
+// The address of the caller is passed implicitly via caller PC.
+// CacheSize is encoded in the name of the run-time function.
+void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
+ Function &F, ArrayRef<Instruction *> IndirCalls) {
+
+ if (IndirCalls.empty()) return;
+ assert(Options.TracePC || Options.TracePCGuard ||
+ Options.Inline8bitCounters /*|| Options.InlineBoolFlag*/);
+ for (auto I : IndirCalls) {
+
+ IRBuilder<> IRB(I);
+ CallBase & CB = cast<CallBase>(*I);
+ Value * Callee = CB.getCalledOperand();
+ if (isa<InlineAsm>(Callee)) continue;
+ IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy));
+
+ }
+
+}
+
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+// {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void ModuleSanitizerCoverage::InjectTraceForSwitch(
+ Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
+
+ for (auto I : SwitchTraceTargets) {
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+
+ IRBuilder<> IRB(I);
+ SmallVector<Constant *, 16> Initializers;
+ Value * Cond = SI->getCondition();
+ if (Cond->getType()->getScalarSizeInBits() >
+ Int64Ty->getScalarSizeInBits())
+ continue;
+ Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+ Initializers.push_back(
+ ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+ if (Cond->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+ for (auto It : SI->cases()) {
+
+ Constant *C = It.getCaseValue();
+ if (C->getType()->getScalarSizeInBits() <
+ Int64Ty->getScalarSizeInBits())
+ C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+ Initializers.push_back(C);
+
+ }
+
+ llvm::sort(Initializers.begin() + 2, Initializers.end(),
+ [](const Constant *A, const Constant *B) {
+
+ return cast<ConstantInt>(A)->getLimitedValue() <
+ cast<ConstantInt>(B)->getLimitedValue();
+
+ });
+
+ ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+ GlobalVariable *GV = new GlobalVariable(
+ *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+ ConstantArray::get(ArrayOfInt64Ty, Initializers),
+ "__sancov_gen_cov_switch_values");
+ IRB.CreateCall(SanCovTraceSwitchFunction,
+ {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+
+ }
+
+ }
+
+}
+
+void ModuleSanitizerCoverage::InjectTraceForDiv(
+ Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
+
+ for (auto BO : DivTraceTargets) {
+
+ IRBuilder<> IRB(BO);
+ Value * A1 = BO->getOperand(1);
+ if (isa<ConstantInt>(A1)) continue;
+ if (!A1->getType()->isIntegerTy()) continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A1->getType());
+ int CallbackIdx = TypeSize == 32 ? 0 : TypeSize == 64 ? 1 : -1;
+ if (CallbackIdx < 0) continue;
+ auto Ty = Type::getIntNTy(*C, TypeSize);
+ IRB.CreateCall(SanCovTraceDivFunction[CallbackIdx],
+ {IRB.CreateIntCast(A1, Ty, true)});
+
+ }
+
+}
+
+void ModuleSanitizerCoverage::InjectTraceForGep(
+ Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
+
+ for (auto GEP : GepTraceTargets) {
+
+ IRBuilder<> IRB(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+ if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+ IRB.CreateCall(SanCovTraceGepFunction,
+ {IRB.CreateIntCast(*I, IntptrTy, true)});
+
+ }
+
+}
+
+void ModuleSanitizerCoverage::InjectTraceForCmp(
+ Function &, ArrayRef<Instruction *> CmpTraceTargets) {
+
+ for (auto I : CmpTraceTargets) {
+
+ if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+
+ IRBuilder<> IRB(ICMP);
+ Value * A0 = ICMP->getOperand(0);
+ Value * A1 = ICMP->getOperand(1);
+ if (!A0->getType()->isIntegerTy()) continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+ int CallbackIdx =
+ TypeSize == 8
+ ? 0
+ : TypeSize == 16 ? 1
+ : TypeSize == 32 ? 2 : TypeSize == 64 ? 3 : -1;
+ if (CallbackIdx < 0) continue;
+ // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+ auto CallbackFunc = SanCovTraceCmpFunction[CallbackIdx];
+ bool FirstIsConst = isa<ConstantInt>(A0);
+ bool SecondIsConst = isa<ConstantInt>(A1);
+ // If both are const, then we don't need such a comparison.
+ if (FirstIsConst && SecondIsConst) continue;
+ // If only one is const, then make it the first callback argument.
+ if (FirstIsConst || SecondIsConst) {
+
+ CallbackFunc = SanCovTraceConstCmpFunction[CallbackIdx];
+ if (SecondIsConst) std::swap(A0, A1);
+
+ }
+
+ auto Ty = Type::getIntNTy(*C, TypeSize);
+ IRB.CreateCall(CallbackFunc, {IRB.CreateIntCast(A0, Ty, true),
+ IRB.CreateIntCast(A1, Ty, true)});
+
+ }
+
+ }
+
+}
+
+void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
+ size_t Idx,
+ bool IsLeafFunc) {
+
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
+ bool IsEntryBB = &BB == &F.getEntryBlock();
+ DebugLoc EntryLoc;
+ if (IsEntryBB) {
+
+ if (auto SP = F.getSubprogram())
+ EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ // Keep static allocas and llvm.localescape calls in the entry block. Even
+ // if we aren't splitting the block, it's nice for allocas to be before
+ // calls.
+ IP = PrepareToSplitEntryBlock(BB, IP);
+
+ } else {
+
+ EntryLoc = IP->getDebugLoc();
+
+ }
+
+ IRBuilder<> IRB(&*IP);
+ IRB.SetCurrentDebugLocation(EntryLoc);
+ if (Options.TracePC) {
+
+ IRB.CreateCall(SanCovTracePC);
+ // ->setCannotMerge(); // gets the PC using GET_CALLER_PC.
+
+ }
+
+ if (Options.TracePCGuard) {
+
+ /* Get CurLoc */
+
+ Value *GuardPtr = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+ ConstantInt::get(IntptrTy, Idx * 4)),
+ Int32PtrTy);
+
+ LoadInst *CurLoc = IRB.CreateLoad(GuardPtr);
+
+ /* Load SHM pointer */
+
+ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+
+ /* Load counter for CurLoc */
+
+ Value * MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
+ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+
+ /* Update bitmap */
+
+ Value *Incr = IRB.CreateAdd(Counter, One);
+
+ if (skip_nozero == NULL) {
+
+ auto cf = IRB.CreateICmpEQ(Incr, Zero);
+ auto carry = IRB.CreateZExt(cf, Int8Ty);
+ Incr = IRB.CreateAdd(Incr, carry);
+
+ }
+
+ IRB.CreateStore(Incr, MapPtrIdx);
+
+ // done :)
+
+ // IRB.CreateCall(SanCovTracePCGuard, Offset)->setCannotMerge();
+ // IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge();
+ ++instr;
+
+ }
+
+ if (Options.Inline8bitCounters) {
+
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray->getValueType(), Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+
+ }
+
+ /*
+ if (Options.InlineBoolFlag) {
+
+ auto FlagPtr = IRB.CreateGEP(
+ FunctionBoolArray->getValueType(), FunctionBoolArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(Int1Ty, FlagPtr);
+ auto ThenTerm =
+ SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false);
+ IRBuilder<> ThenIRB(ThenTerm);
+ auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+
+ }
+
+ */
+
+ if (Options.StackDepth && IsEntryBB && !IsLeafFunc) {
+
+ // Check stack depth. If it's the deepest so far, record it.
+ Module * M = F.getParent();
+ Function *GetFrameAddr = Intrinsic::getDeclaration(
+ M, Intrinsic::frameaddress,
+ IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
+ auto FrameAddrPtr =
+ IRB.CreateCall(GetFrameAddr, {Constant::getNullValue(Int32Ty)});
+ auto FrameAddrInt = IRB.CreatePtrToInt(FrameAddrPtr, IntptrTy);
+ auto LowestStack = IRB.CreateLoad(IntptrTy, SanCovLowestStack);
+ auto IsStackLower = IRB.CreateICmpULT(FrameAddrInt, LowestStack);
+ auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
+ IRBuilder<> ThenIRB(ThenTerm);
+ auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack);
+ SetNoSanitizeMetadata(LowestStack);
+ SetNoSanitizeMetadata(Store);
+
+ }
+
+}
+
+std::string ModuleSanitizerCoverage::getSectionName(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatCOFF()) {
+
+ if (Section == SanCovCountersSectionName) return ".SCOV$CM";
+ if (Section == SanCovBoolFlagSectionName) return ".SCOV$BM";
+ if (Section == SanCovPCsSectionName) return ".SCOVP$M";
+ return ".SCOV$GM"; // For SanCovGuardsSectionName.
+
+ }
+
+ if (TargetTriple.isOSBinFormatMachO()) return "__DATA,__" + Section;
+ return "__" + Section;
+
+}
+
+std::string ModuleSanitizerCoverage::getSectionStart(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
+
+}
+
+std::string ModuleSanitizerCoverage::getSectionEnd(
+ const std::string &Section) const {
+
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
+
+}
+
+char ModuleSanitizerCoverageLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
+ "Pass for instrumenting coverage on functions", false,
+ false)
+ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
+ const SanitizerCoverageOptions &Options,
+ const std::vector<std::string> &AllowlistFiles,
+ const std::vector<std::string> &BlocklistFiles) {
+
+ return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles,
+ BlocklistFiles);
+
+}
+
+static void registerPCGUARDPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new ModuleSanitizerCoverageLegacyPass();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterCompTransPass(
+ PassManagerBuilder::EP_OptimizerLast, registerPCGUARDPass);
+
+static RegisterStandardPasses RegisterCompTransPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerPCGUARDPass);
+
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
new file mode 100644
index 00000000..f38af668
--- /dev/null
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -0,0 +1,1268 @@
+/*
+ american fuzzy lop++ - instrumentation bootstrap
+ ------------------------------------------------
+
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+
+*/
+
+#ifdef __ANDROID__
+ #include "android-ashmem.h"
+#endif
+#include "config.h"
+#include "types.h"
+#include "cmplog.h"
+#include "llvm-ngram-coverage.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <sys/mman.h>
+#ifndef __HAIKU__
+ #include <sys/shm.h>
+#endif
+#include <sys/wait.h>
+#include <sys/types.h>
+
+#if !__GNUC__
+ #include "llvm/Config/llvm-config.h"
+#endif
+
+#ifdef __linux__
+ #include "snapshot-inl.h"
+#endif
+
+/* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode.
+ Basically, we need to make sure that the forkserver is initialized after
+ the LLVM-generated runtime initialization pass, not before. */
+
+#ifndef MAP_FIXED_NOREPLACE
+ #ifdef MAP_EXCL
+ #define MAP_FIXED_NOREPLACE MAP_EXCL | MAP_FIXED
+ #else
+ #define MAP_FIXED_NOREPLACE MAP_FIXED
+ #endif
+#endif
+
+#define CTOR_PRIO 3
+
+#include <sys/mman.h>
+#include <fcntl.h>
+
+/* Globals needed by the injected instrumentation. The __afl_area_initial region
+ is used for instrumentation output before __afl_map_shm() has a chance to
+ run. It will end up as .comm, so it shouldn't be too wasteful. */
+
+#if MAP_SIZE <= 65536
+ #define MAP_INITIAL_SIZE 256000
+#else
+ #define MAP_INITIAL_SIZE MAP_SIZE
+#endif
+
+u8 __afl_area_initial[MAP_INITIAL_SIZE];
+u8 * __afl_area_ptr = __afl_area_initial;
+u8 * __afl_dictionary;
+u8 * __afl_fuzz_ptr;
+u32 __afl_fuzz_len_dummy;
+u32 *__afl_fuzz_len = &__afl_fuzz_len_dummy;
+
+u32 __afl_final_loc;
+u32 __afl_map_size = MAP_SIZE;
+u32 __afl_dictionary_len;
+u64 __afl_map_addr;
+
+#ifdef __ANDROID__
+PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+u32 __afl_prev_ctx;
+u32 __afl_cmp_counter;
+#else
+__thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+__thread u32 __afl_prev_ctx;
+__thread u32 __afl_cmp_counter;
+#endif
+
+int __afl_sharedmem_fuzzing __attribute__((weak));
+
+struct cmp_map *__afl_cmp_map;
+
+/* Running in persistent mode? */
+
+static u8 is_persistent;
+
+/* Are we in sancov mode? */
+
+static u8 _is_sancov;
+
+/* Uninspired gcc plugin instrumentation */
+
+void __afl_trace(const u32 x) {
+
+ PREV_LOC_T prev = __afl_prev_loc[0];
+ __afl_prev_loc[0] = (x >> 1);
+
+ u8 *p = &__afl_area_ptr[prev ^ x];
+
+#if 1 /* enable for neverZero feature. */
+ #if __GNUC__
+ u8 c = __builtin_add_overflow(*p, 1, p);
+ *p += c;
+ #else
+ *p += 1 + ((u8)(1 + *p == 0);
+ #endif
+#else
+ ++*p;
+#endif
+
+ return;
+
+}
+
+/* Error reporting to forkserver controller */
+
+void send_forkserver_error(int error) {
+
+ u32 status;
+ if (!error || error > 0xffff) return;
+ status = (FS_OPT_ERROR | FS_OPT_SET_ERROR(error));
+ if (write(FORKSRV_FD + 1, (char *)&status, 4) != 4) return;
+
+}
+
+/* SHM fuzzing setup. */
+
+static void __afl_map_shm_fuzz() {
+
+ char *id_str = getenv(SHM_FUZZ_ENV_VAR);
+
+ if (id_str) {
+
+ u8 *map = NULL;
+
+#ifdef USEMMAP
+ const char * shm_file_path = id_str;
+ int shm_fd = -1;
+ unsigned char *shm_base = NULL;
+
+ /* create the shared memory segment as if it was a file */
+ shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+ if (shm_fd == -1) {
+
+ fprintf(stderr, "shm_open() failed for fuzz\n");
+ send_forkserver_error(FS_ERROR_SHM_OPEN);
+ exit(1);
+
+ }
+
+ map =
+ (u8 *)mmap(0, MAX_FILE + sizeof(u32), PROT_READ, MAP_SHARED, shm_fd, 0);
+
+#else
+ u32 shm_id = atoi(id_str);
+ map = (u8 *)shmat(shm_id, NULL, 0);
+
+#endif
+
+ /* Whooooops. */
+
+ if (!map || map == (void *)-1) {
+
+ perror("Could not access fuzzign shared memory");
+ exit(1);
+
+ }
+
+ __afl_fuzz_len = (u32 *)map;
+ __afl_fuzz_ptr = map + sizeof(u32);
+
+ if (getenv("AFL_DEBUG")) {
+
+ fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
+
+ }
+
+ } else {
+
+ fprintf(stderr, "Error: variable for fuzzing shared memory is not set\n");
+ exit(1);
+
+ }
+
+}
+
+/* SHM setup. */
+
+static void __afl_map_shm(void) {
+
+ // we we are not running in afl ensure the map exists
+ if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_initial; }
+
+ char *id_str = getenv(SHM_ENV_VAR);
+
+ if (__afl_final_loc) {
+
+ if (__afl_final_loc % 8)
+ __afl_final_loc = (((__afl_final_loc + 7) >> 3) << 3);
+ __afl_map_size = __afl_final_loc;
+
+ if (__afl_final_loc > MAP_SIZE) {
+
+ char *ptr;
+ u32 val = 0;
+ if ((ptr = getenv("AFL_MAP_SIZE")) != NULL) val = atoi(ptr);
+ if (val < __afl_final_loc) {
+
+ if (__afl_final_loc > FS_OPT_MAX_MAPSIZE) {
+
+ if (!getenv("AFL_QUIET"))
+ fprintf(stderr,
+ "Error: AFL++ tools *require* to set AFL_MAP_SIZE to %u "
+ "to be able to run this instrumented program!\n",
+ __afl_final_loc);
+
+ if (id_str) {
+
+ send_forkserver_error(FS_ERROR_MAP_SIZE);
+ exit(-1);
+
+ }
+
+ } else {
+
+ if (!getenv("AFL_QUIET"))
+ fprintf(stderr,
+ "Warning: AFL++ tools will need to set AFL_MAP_SIZE to %u "
+ "to be able to run this instrumented program!\n",
+ __afl_final_loc);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ /* If we're running under AFL, attach to the appropriate region, replacing the
+ early-stage __afl_area_initial region that is needed to allow some really
+ hacky .init code to work correctly in projects such as OpenSSL. */
+
+ if (getenv("AFL_DEBUG"))
+ fprintf(stderr,
+ "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
+ "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, "
+ "max_size_forkserver %u/0x%x\n",
+ id_str == NULL ? "<null>" : id_str, __afl_area_ptr,
+ __afl_area_initial, __afl_map_addr, MAP_SIZE, __afl_final_loc,
+ FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+
+ if (id_str) {
+
+ if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) {
+
+ if (__afl_map_addr)
+ munmap((void *)__afl_map_addr, __afl_final_loc);
+ else
+ free(__afl_area_ptr);
+ __afl_area_ptr = __afl_area_initial;
+
+ }
+
+#ifdef USEMMAP
+ const char * shm_file_path = id_str;
+ int shm_fd = -1;
+ unsigned char *shm_base = NULL;
+
+ /* create the shared memory segment as if it was a file */
+ shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+ if (shm_fd == -1) {
+
+ fprintf(stderr, "shm_open() failed\n");
+ send_forkserver_error(FS_ERROR_SHM_OPEN);
+ exit(1);
+
+ }
+
+ /* map the shared memory segment to the address space of the process */
+ if (__afl_map_addr) {
+
+ shm_base =
+ mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_SHARED, shm_fd, 0);
+
+ } else {
+
+ shm_base = mmap(0, __afl_map_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ shm_fd, 0);
+
+ }
+
+ if (shm_base == MAP_FAILED) {
+
+ close(shm_fd);
+ shm_fd = -1;
+
+ fprintf(stderr, "mmap() failed\n");
+ if (__afl_map_addr)
+ send_forkserver_error(FS_ERROR_MAP_ADDR);
+ else
+ send_forkserver_error(FS_ERROR_MMAP);
+ exit(2);
+
+ }
+
+ __afl_area_ptr = shm_base;
+#else
+ u32 shm_id = atoi(id_str);
+
+ __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0);
+
+#endif
+
+ /* Whooooops. */
+
+ if (__afl_area_ptr == (void *)-1) {
+
+ if (__afl_map_addr)
+ send_forkserver_error(FS_ERROR_MAP_ADDR);
+ else
+ send_forkserver_error(FS_ERROR_SHMAT);
+ _exit(1);
+
+ }
+
+ /* Write something into the bitmap so that even with low AFL_INST_RATIO,
+ our parent doesn't give up on us. */
+
+ __afl_area_ptr[0] = 1;
+
+ } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) &&
+
+ __afl_map_addr) {
+
+ __afl_area_ptr =
+ mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ if (__afl_area_ptr == MAP_FAILED) {
+
+ fprintf(stderr, "can not acquire mmap for address %p\n",
+ (void *)__afl_map_addr);
+ exit(1);
+
+ }
+
+ } else if (_is_sancov && __afl_area_ptr != __afl_area_initial) {
+
+ free(__afl_area_ptr);
+ __afl_area_ptr = NULL;
+ if (__afl_final_loc > MAP_INITIAL_SIZE)
+ __afl_area_ptr = malloc(__afl_final_loc);
+ if (!__afl_area_ptr) __afl_area_ptr = __afl_area_initial;
+
+ }
+
+ id_str = getenv(CMPLOG_SHM_ENV_VAR);
+
+ if (getenv("AFL_DEBUG")) {
+
+ fprintf(stderr, "DEBUG: cmplog id_str %s\n",
+ id_str == NULL ? "<null>" : id_str);
+
+ }
+
+ if (id_str) {
+
+#ifdef USEMMAP
+ const char * shm_file_path = id_str;
+ int shm_fd = -1;
+ unsigned char *shm_base = NULL;
+
+ /* create the shared memory segment as if it was a file */
+ shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+ if (shm_fd == -1) {
+
+ fprintf(stderr, "shm_open() failed\n");
+ exit(1);
+
+ }
+
+ /* map the shared memory segment to the address space of the process */
+ shm_base = mmap(0, sizeof(struct cmp_map), PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, 0);
+ if (shm_base == MAP_FAILED) {
+
+ close(shm_fd);
+ shm_fd = -1;
+
+ fprintf(stderr, "mmap() failed\n");
+ exit(2);
+
+ }
+
+ __afl_cmp_map = shm_base;
+#else
+ u32 shm_id = atoi(id_str);
+
+ __afl_cmp_map = shmat(shm_id, NULL, 0);
+#endif
+
+ if (__afl_cmp_map == (void *)-1) _exit(1);
+
+ }
+
+}
+
+#ifdef __linux__
+static void __afl_start_snapshots(void) {
+
+ static u8 tmp[4] = {0, 0, 0, 0};
+ s32 child_pid;
+ u32 status = 0;
+ u32 already_read_first = 0;
+ u32 was_killed;
+
+ u8 child_stopped = 0;
+
+ void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL);
+
+ /* Phone home and tell the parent that we're OK. If parent isn't there,
+ assume we're not running in forkserver mode and just execute program. */
+
+ status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+ if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
+ if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
+ status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
+ if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
+ memcpy(tmp, &status, 4);
+
+ if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+
+ if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
+
+ if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+ if (getenv("AFL_DEBUG")) {
+
+ fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
+
+ }
+
+ if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) ==
+ (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+ __afl_map_shm_fuzz();
+
+ }
+
+ if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
+ (FS_OPT_ENABLED | FS_OPT_AUTODICT) &&
+ __afl_dictionary_len && __afl_dictionary) {
+
+ // great lets pass the dictionary through the forkserver FD
+ u32 len = __afl_dictionary_len, offset = 0;
+ s32 ret;
+
+ if (write(FORKSRV_FD + 1, &len, 4) != 4) {
+
+ write(2, "Error: could not send dictionary len\n",
+ strlen("Error: could not send dictionary len\n"));
+ _exit(1);
+
+ }
+
+ while (len != 0) {
+
+ ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len);
+
+ if (ret < 1) {
+
+ write(2, "Error: could not send dictionary\n",
+ strlen("Error: could not send dictionary\n"));
+ _exit(1);
+
+ }
+
+ len -= ret;
+ offset += ret;
+
+ }
+
+ } else {
+
+ // uh this forkserver does not understand extended option passing
+ // or does not want the dictionary
+ if (!__afl_fuzz_ptr) already_read_first = 1;
+
+ }
+
+ }
+
+ while (1) {
+
+ int status;
+
+ if (already_read_first) {
+
+ already_read_first = 0;
+
+ } else {
+
+ /* Wait for parent by reading from the pipe. Abort if read fails. */
+ if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+ }
+
+ #ifdef _AFL_DOCUMENT_MUTATIONS
+ if (__afl_fuzz_ptr) {
+
+ static uint32_t counter = 0;
+ char fn[32];
+ sprintf(fn, "%09u:forkserver", counter);
+ s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (fd_doc >= 0) {
+
+ if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
+
+ fprintf(stderr, "write of mutation file failed: %s\n", fn);
+ unlink(fn);
+
+ }
+
+ close(fd_doc);
+
+ }
+
+ counter++;
+
+ }
+
+ #endif
+
+ /* If we stopped the child in persistent mode, but there was a race
+ condition and afl-fuzz already issued SIGKILL, write off the old
+ process. */
+
+ if (child_stopped && was_killed) {
+
+ child_stopped = 0;
+ if (waitpid(child_pid, &status, 0) < 0) _exit(1);
+
+ }
+
+ if (!child_stopped) {
+
+ /* Once woken up, create a clone of our process. */
+
+ child_pid = fork();
+ if (child_pid < 0) _exit(1);
+
+ /* In child process: close fds, resume execution. */
+
+ if (!child_pid) {
+
+ //(void)nice(-20); // does not seem to improve
+
+ signal(SIGCHLD, old_sigchld_handler);
+
+ close(FORKSRV_FD);
+ close(FORKSRV_FD + 1);
+
+ if (!afl_snapshot_take(AFL_SNAPSHOT_MMAP | AFL_SNAPSHOT_FDS |
+ AFL_SNAPSHOT_REGS | AFL_SNAPSHOT_EXIT)) {
+
+ raise(SIGSTOP);
+
+ }
+
+ __afl_area_ptr[0] = 1;
+ memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
+
+ return;
+
+ }
+
+ } else {
+
+ /* Special handling for persistent mode: if the child is alive but
+ currently stopped, simply restart it with SIGCONT. */
+
+ kill(child_pid, SIGCONT);
+ child_stopped = 0;
+
+ }
+
+ /* In parent process: write PID to pipe, then wait for child. */
+
+ if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1);
+
+ if (waitpid(child_pid, &status, WUNTRACED) < 0) _exit(1);
+
+ /* In persistent mode, the child stops itself with SIGSTOP to indicate
+ a successful run. In this case, we want to wake it up without forking
+ again. */
+
+ if (WIFSTOPPED(status)) child_stopped = 1;
+
+ /* Relay wait status to pipe, then loop back. */
+
+ if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1);
+
+ }
+
+}
+
+#endif
+
+/* Fork server logic. */
+
+static void __afl_start_forkserver(void) {
+
+#ifdef __linux__
+ if (/*!is_persistent &&*/ !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
+ afl_snapshot_init() >= 0) {
+
+ __afl_start_snapshots();
+ return;
+
+ }
+
+#endif
+
+ u8 tmp[4] = {0, 0, 0, 0};
+ s32 child_pid;
+ u32 status = 0;
+ u32 already_read_first = 0;
+ u32 was_killed;
+
+ u8 child_stopped = 0;
+
+ void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL);
+
+ if (__afl_map_size <= FS_OPT_MAX_MAPSIZE)
+ status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE);
+ if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT;
+ if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ;
+ if (status) status |= (FS_OPT_ENABLED);
+ memcpy(tmp, &status, 4);
+
+ /* Phone home and tell the parent that we're OK. If parent isn't there,
+ assume we're not running in forkserver mode and just execute program. */
+
+ if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+
+ if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
+
+ if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+ if (getenv("AFL_DEBUG")) {
+
+ fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
+
+ }
+
+ if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) ==
+ (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) {
+
+ __afl_map_shm_fuzz();
+
+ }
+
+ if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
+ (FS_OPT_ENABLED | FS_OPT_AUTODICT) &&
+ __afl_dictionary_len && __afl_dictionary) {
+
+ // great lets pass the dictionary through the forkserver FD
+ u32 len = __afl_dictionary_len, offset = 0;
+ s32 ret;
+
+ if (write(FORKSRV_FD + 1, &len, 4) != 4) {
+
+ write(2, "Error: could not send dictionary len\n",
+ strlen("Error: could not send dictionary len\n"));
+ _exit(1);
+
+ }
+
+ while (len != 0) {
+
+ ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len);
+
+ if (ret < 1) {
+
+ write(2, "Error: could not send dictionary\n",
+ strlen("Error: could not send dictionary\n"));
+ _exit(1);
+
+ }
+
+ len -= ret;
+ offset += ret;
+
+ }
+
+ } else {
+
+ // uh this forkserver does not understand extended option passing
+ // or does not want the dictionary
+ if (!__afl_fuzz_ptr) already_read_first = 1;
+
+ }
+
+ }
+
+ while (1) {
+
+ int status;
+
+ /* Wait for parent by reading from the pipe. Abort if read fails. */
+
+ if (already_read_first) {
+
+ already_read_first = 0;
+
+ } else {
+
+ if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+ }
+
+#ifdef _AFL_DOCUMENT_MUTATIONS
+ if (__afl_fuzz_ptr) {
+
+ static uint32_t counter = 0;
+ char fn[32];
+ sprintf(fn, "%09u:forkserver", counter);
+ s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (fd_doc >= 0) {
+
+ if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
+
+ fprintf(stderr, "write of mutation file failed: %s\n", fn);
+ unlink(fn);
+
+ }
+
+ close(fd_doc);
+
+ }
+
+ counter++;
+
+ }
+
+#endif
+
+ /* If we stopped the child in persistent mode, but there was a race
+ condition and afl-fuzz already issued SIGKILL, write off the old
+ process. */
+
+ if (child_stopped && was_killed) {
+
+ child_stopped = 0;
+ if (waitpid(child_pid, &status, 0) < 0) _exit(1);
+
+ }
+
+ if (!child_stopped) {
+
+ /* Once woken up, create a clone of our process. */
+
+ child_pid = fork();
+ if (child_pid < 0) _exit(1);
+
+ /* In child process: close fds, resume execution. */
+
+ if (!child_pid) {
+
+ //(void)nice(-20);
+
+ signal(SIGCHLD, old_sigchld_handler);
+
+ close(FORKSRV_FD);
+ close(FORKSRV_FD + 1);
+ return;
+
+ }
+
+ } else {
+
+ /* Special handling for persistent mode: if the child is alive but
+ currently stopped, simply restart it with SIGCONT. */
+
+ kill(child_pid, SIGCONT);
+ child_stopped = 0;
+
+ }
+
+ /* In parent process: write PID to pipe, then wait for child. */
+
+ if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1);
+
+ if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0)
+ _exit(1);
+
+ /* In persistent mode, the child stops itself with SIGSTOP to indicate
+ a successful run. In this case, we want to wake it up without forking
+ again. */
+
+ if (WIFSTOPPED(status)) child_stopped = 1;
+
+ /* Relay wait status to pipe, then loop back. */
+
+ if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1);
+
+ }
+
+}
+
+/* A simplified persistent mode handler, used as explained in
+ * README.llvm.md. */
+
+int __afl_persistent_loop(unsigned int max_cnt) {
+
+ static u8 first_pass = 1;
+ static u32 cycle_cnt;
+
+ if (first_pass) {
+
+ /* Make sure that every iteration of __AFL_LOOP() starts with a clean slate.
+ On subsequent calls, the parent will take care of that, but on the first
+ iteration, it's our job to erase any trace of whatever happened
+ before the loop. */
+
+ if (is_persistent) {
+
+ memset(__afl_area_ptr, 0, __afl_map_size);
+ __afl_area_ptr[0] = 1;
+ memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
+
+ }
+
+ cycle_cnt = max_cnt;
+ first_pass = 0;
+ return 1;
+
+ }
+
+ if (is_persistent) {
+
+ if (--cycle_cnt) {
+
+ raise(SIGSTOP);
+
+ __afl_area_ptr[0] = 1;
+ memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
+
+ return 1;
+
+ } else {
+
+ /* When exiting __AFL_LOOP(), make sure that the subsequent code that
+ follows the loop is not traced. We do that by pivoting back to the
+ dummy output region. */
+
+ __afl_area_ptr = __afl_area_initial;
+
+ }
+
+ }
+
+ return 0;
+
+}
+
+/* This one can be called from user code when deferred forkserver mode
+ is enabled. */
+
+void __afl_manual_init(void) {
+
+ static u8 init_done;
+
+ if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) {
+
+ init_done = 1;
+ is_persistent = 0;
+ __afl_sharedmem_fuzzing = 0;
+ if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_initial;
+
+ if (getenv("AFL_DEBUG"))
+ fprintf(stderr,
+ "DEBUG: disabled instrumentation because of "
+ "AFL_DISABLE_LLVM_INSTRUMENTATION\n");
+
+ }
+
+ if (!init_done) {
+
+ __afl_start_forkserver();
+ init_done = 1;
+
+ }
+
+}
+
+/* Initialization of the forkserver - latest possible */
+
+__attribute__((constructor())) void __afl_auto_init(void) {
+
+ if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+
+ if (getenv(DEFER_ENV_VAR)) return;
+
+ __afl_manual_init();
+
+}
+
+/* Initialization of the shmem - earliest possible because of LTO fixed mem. */
+
+__attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) {
+
+ is_persistent = !!getenv(PERSIST_ENV_VAR);
+
+ if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+
+ __afl_map_shm();
+
+}
+
+/* preset __afl_area_ptr #2 */
+
+__attribute__((constructor(1))) void __afl_auto_second(void) {
+
+ if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+ u8 *ptr;
+
+ if (__afl_final_loc) {
+
+ if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial)
+ free(__afl_area_ptr);
+
+ if (__afl_map_addr)
+ ptr = (u8 *)mmap((void *)__afl_map_addr, __afl_final_loc,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ else
+ ptr = (u8 *)malloc(__afl_final_loc);
+
+ if (ptr && (ssize_t)ptr != -1) __afl_area_ptr = ptr;
+
+ }
+
+}
+
+/* preset __afl_area_ptr #1 - at constructor level 0 global variables have
+ not been set */
+
+__attribute__((constructor(0))) void __afl_auto_first(void) {
+
+ if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+ u8 *ptr;
+
+ ptr = (u8 *)malloc(1024000);
+
+ if (ptr && (ssize_t)ptr != -1) __afl_area_ptr = ptr;
+
+}
+
+/* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard.
+ It remains non-operational in the traditional, plugin-backed LLVM mode.
+ For more info about 'trace-pc-guard', see README.llvm.md.
+
+ The first function (__sanitizer_cov_trace_pc_guard) is called back on every
+ edge (as opposed to every basic block). */
+
+void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
+
+ // For stability analysis, if you want to know to which function unstable
+ // edge IDs belong - uncomment, recompile+install llvm_mode, recompile
+ // the target. libunwind and libbacktrace are better solutions.
+ // Set AFL_DEBUG_CHILD_OUTPUT=1 and run afl-fuzz with 2>file to capture
+ // the backtrace output
+ /*
+ uint32_t unstable[] = { ... unstable edge IDs };
+ uint32_t idx;
+ char bt[1024];
+ for (idx = 0; i < sizeof(unstable)/sizeof(uint32_t); i++) {
+
+ if (unstable[idx] == __afl_area_ptr[*guard]) {
+
+ int bt_size = backtrace(bt, 256);
+ if (bt_size > 0) {
+
+ char **bt_syms = backtrace_symbols(bt, bt_size);
+ if (bt_syms) {
+
+ fprintf(stderr, "DEBUG: edge=%u caller=%s\n", unstable[idx],
+ bt_syms[0]);
+ free(bt_syms);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ */
+
+#if (LLVM_VERSION_MAJOR < 9)
+
+ __afl_area_ptr[*guard]++;
+
+#else
+
+ __afl_area_ptr[*guard] =
+ __afl_area_ptr[*guard] + 1 + (__afl_area_ptr[*guard] == 255 ? 1 : 0);
+
+#endif
+
+}
+
+/* Init callback. Populates instrumentation IDs. Note that we're using
+ ID of 0 as a special value to indicate non-instrumented bits. That may
+ still touch the bitmap, but in a fairly harmless way. */
+
+void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
+
+ u32 inst_ratio = 100;
+ char *x;
+
+ _is_sancov = 1;
+
+ if (getenv("AFL_DEBUG")) {
+
+ fprintf(stderr, "Running __sanitizer_cov_trace_pc_guard_init: %p-%p\n",
+ start, stop);
+
+ }
+
+ if (start == stop || *start) return;
+
+ x = getenv("AFL_INST_RATIO");
+ if (x) inst_ratio = (u32)atoi(x);
+
+ if (!inst_ratio || inst_ratio > 100) {
+
+ fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n");
+ abort();
+
+ }
+
+ /* Make sure that the first element in the range is always set - we use that
+ to avoid duplicate calls (which can happen as an artifact of the underlying
+ implementation in LLVM). */
+
+ *(start++) = R(MAP_SIZE - 1) + 1;
+
+ while (start < stop) {
+
+ if (R(100) < inst_ratio)
+ *start = ++__afl_final_loc;
+ else
+ *start = 0;
+
+ start++;
+
+ }
+
+}
+
+///// CmpLog instrumentation
+
+void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+ // if (!__afl_cmp_map->headers[k].cnt)
+ // __afl_cmp_map->headers[k].cnt = __afl_cmp_counter++;
+
+ __afl_cmp_map->headers[k].shape = 0;
+
+ hits &= CMP_MAP_H - 1;
+ __afl_cmp_map->log[k][hits].v0 = arg1;
+ __afl_cmp_map->log[k][hits].v1 = arg2;
+
+}
+
+void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+
+ __afl_cmp_map->headers[k].shape = 1;
+
+ hits &= CMP_MAP_H - 1;
+ __afl_cmp_map->log[k][hits].v0 = arg1;
+ __afl_cmp_map->log[k][hits].v1 = arg2;
+
+}
+
+void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+
+ __afl_cmp_map->headers[k].shape = 3;
+
+ hits &= CMP_MAP_H - 1;
+ __afl_cmp_map->log[k][hits].v0 = arg1;
+ __afl_cmp_map->log[k][hits].v1 = arg2;
+
+}
+
+void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+
+ __afl_cmp_map->headers[k].shape = 7;
+
+ hits &= CMP_MAP_H - 1;
+ __afl_cmp_map->log[k][hits].v0 = arg1;
+ __afl_cmp_map->log[k][hits].v1 = arg2;
+
+}
+
+#if defined(__APPLE__)
+ #pragma weak __sanitizer_cov_trace_const_cmp1 = __cmplog_ins_hook1
+ #pragma weak __sanitizer_cov_trace_const_cmp2 = __cmplog_ins_hook2
+ #pragma weak __sanitizer_cov_trace_const_cmp4 = __cmplog_ins_hook4
+ #pragma weak __sanitizer_cov_trace_const_cmp8 = __cmplog_ins_hook8
+
+ #pragma weak __sanitizer_cov_trace_cmp1 = __cmplog_ins_hook1
+ #pragma weak __sanitizer_cov_trace_cmp2 = __cmplog_ins_hook2
+ #pragma weak __sanitizer_cov_trace_cmp4 = __cmplog_ins_hook4
+ #pragma weak __sanitizer_cov_trace_cmp8 = __cmplog_ins_hook8
+#else
+void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2)
+ __attribute__((alias("__cmplog_ins_hook1")));
+void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2)
+ __attribute__((alias("__cmplog_ins_hook2")));
+void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2)
+ __attribute__((alias("__cmplog_ins_hook4")));
+void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2)
+ __attribute__((alias("__cmplog_ins_hook8")));
+
+void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2)
+ __attribute__((alias("__cmplog_ins_hook1")));
+void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2)
+ __attribute__((alias("__cmplog_ins_hook2")));
+void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2)
+ __attribute__((alias("__cmplog_ins_hook4")));
+void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2)
+ __attribute__((alias("__cmplog_ins_hook8")));
+#endif /* defined(__APPLE__) */
+
+void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ for (uint64_t i = 0; i < cases[0]; i++) {
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0) + i;
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+
+ __afl_cmp_map->headers[k].shape = 7;
+
+ hits &= CMP_MAP_H - 1;
+ __afl_cmp_map->log[k][hits].v0 = val;
+ __afl_cmp_map->log[k][hits].v1 = cases[i + 2];
+
+ }
+
+}
+
+// POSIX shenanigan to see if an area is mapped.
+// If it is mapped as X-only, we have a problem, so maybe we should add a check
+// to avoid to call it on .text addresses
+static int area_is_mapped(void *ptr, size_t len) {
+
+ char *p = ptr;
+ char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
+
+ int r = msync(page, (p - page) + len, MS_ASYNC);
+ if (r < 0) return errno != ENOMEM;
+ return 1;
+
+}
+
+void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
+
+ if (unlikely(!__afl_cmp_map)) return;
+
+ if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+
+ uintptr_t k = (uintptr_t)__builtin_return_address(0);
+ k = (k >> 4) ^ (k << 8);
+ k &= CMP_MAP_W - 1;
+
+ __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+
+ u32 hits = __afl_cmp_map->headers[k].hits;
+ __afl_cmp_map->headers[k].hits = hits + 1;
+
+ __afl_cmp_map->headers[k].shape = 31;
+
+ hits &= CMP_MAP_RTN_H - 1;
+ __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
+ ptr1, 32);
+ __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
+ ptr2, 32);
+
+}
+
diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc
new file mode 100644
index 00000000..f94bb57f
--- /dev/null
+++ b/instrumentation/afl-gcc-pass.so.cc
@@ -0,0 +1,968 @@
+/* GCC plugin for instrumentation of code for american fuzzy lop.
+
+ Copyright 2014-2019 Free Software Foundation, Inc
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AdaCore
+
+ Written by Alexandre Oliva <oliva@adacore.com>, based on the AFL
+ LLVM pass by Laszlo Szekeres <lszekeres@google.com> and Michal
+ Zalewski <lcamtuf@google.com>, and copying a little boilerplate
+ from GCC's libcc1 plugin and GCC proper. Aside from the
+ boilerplate, namely includes and the pass data structure, and pass
+ initialization code and output messages borrowed and adapted from
+ the LLVM pass into plugin_init and plugin_finalize, the
+ implementation of the GCC pass proper is written from scratch,
+ aiming at similar behavior and performance to that of the LLVM
+ pass, and also at compatibility with the out-of-line
+ instrumentation and run times of AFL++, as well as of an earlier
+ GCC plugin implementation by Austin Seipp <aseipp@pobox.com>. The
+ implementation of Allow/Deny Lists is adapted from that in the LLVM
+ plugin.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ */
+
+/* This file implements a GCC plugin that introduces an
+ instrumentation pass for AFL. What follows is the specification
+ used to rewrite it, extracted from the functional llvm_mode pass
+ and from an implementation of the gcc_plugin started by Austin
+ Seipp <aseipp@pobox.com>.
+
+ Declare itself as GPL-compatible.
+
+ Define a 'plugin_init' function.
+
+ Check version against the global gcc_version.
+
+ Register a PLUGIN_INFO object with .version and .help.
+
+ Initialize the random number generator seed with GCC's
+ random seed.
+
+ Set quiet mode depending on whether stderr is a terminal and
+ AFL_QUIET is set.
+
+ Output some identification message if not in quiet mode.
+
+ Parse AFL_INST_RATIO, if set, as a number between 0 and 100. Error
+ out if it's not in range; set up an instrumentation ratio global
+ otherwise.
+
+ Introduce a single instrumentation pass after SSA.
+
+ The new pass is to be a GIMPLE_PASS. Given the sort of
+ instrumentation it's supposed to do, its todo_flags_finish will
+ certainly need TODO_update_ssa, and TODO_cleanup_cfg.
+ TODO_verify_il is probably desirable, at least during debugging.
+ TODO_rebuild_cgraph_edges is required only in the out-of-line
+ instrumentation mode.
+
+ The instrumentation pass amounts to iterating over all basic blocks
+ and optionally inserting one of the instrumentation sequences below
+ after its labels, to indicate execution entered the block.
+
+ A block should be skipped if R(100) (from ../types.h) is >= the
+ global instrumentation ratio.
+
+ A block may be skipped for other reasons, such as if all of its
+ predecessors have a single successor.
+
+ For an instrumented block, a R(MAP_SIZE) say <N> should be
+ generated to be used as its location number. Let <C> be a compiler
+ constant built out of it.
+
+ Count instrumented blocks and print a message at the end of the
+ compilation, if not in quiet mode.
+
+ Instrumentation in "dumb" or "out-of-line" mode requires calling a
+ function, passing it the location number. The function to be
+ called is __afl_trace, implemented in afl-gcc-rt.o.c. Its
+ declaration <T> needs only be created once.
+
+ Build the call statement <T> (<C>), then add it to the seq to be
+ inserted.
+
+ Instrumentation in "fast" or "inline" mode performs the computation
+ of __afl_trace as part of the function.
+
+ It needs to read and write __afl_prev_loc, a TLS u32 variable. Its
+ declaration <P> needs only be created once.
+
+ It needs to read and dereference __afl_area_ptr, a pointer to (an
+ array of) char. Its declaration <M> needs only be created once.
+
+ The instrumentation sequence should then be filled with the
+ following statements:
+
+ Load from <P> to a temporary (<TP>) of the same type.
+
+ Compute <TP> ^ <C> in sizetype, converting types as needed.
+
+ Pointer-add <B> (to be introduced at a later point) and <I> into
+ another temporary <A>.
+
+ Increment the <*A> MEM_REF.
+
+ Store <C> >> 1 in <P>.
+
+ Temporaries used above need only be created once per function.
+
+ If any block was instrumented in a function, an initializer for <B>
+ needs to be introduced, loading it from <M> and inserting it in the
+ entry edge for the entry block.
+*/
+
+#include "../include/config.h"
+#include "../include/debug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifdef likely
+ #undef likely
+#endif
+#ifdef unlikely
+ #undef unlikely
+#endif
+
+#include <list>
+#include <string>
+#include <fstream>
+
+#include <algorithm>
+#include <fnmatch.h>
+
+#include <gcc-plugin.h>
+#include <plugin-version.h>
+#include <toplev.h>
+#include <tree-pass.h>
+#include <context.h>
+#include <tree.h>
+#include <gimplify.h>
+#include <basic-block.h>
+#include <tree-ssa-alias.h>
+#include <gimple-expr.h>
+#include <gimple.h>
+#include <gimple-iterator.h>
+#include <stringpool.h>
+#include <gimple-ssa.h>
+#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= \
+ 60200 /* >= version 6.2.0 */
+ #include <tree-vrp.h>
+#endif
+#include <tree-ssanames.h>
+#include <tree-phinodes.h>
+#include <ssa-iterators.h>
+
+#include <intl.h>
+
+/* This plugin, being under the same license as GCC, satisfies the
+ "GPL-compatible Software" definition in the GCC RUNTIME LIBRARY
+ EXCEPTION, so it can be part of an "Eligible" "Compilation
+ Process". */
+int plugin_is_GPL_compatible = 1;
+
+namespace {
+
+static const struct pass_data afl_pass_data = {
+
+ .type = GIMPLE_PASS,
+ .name = "afl",
+ .optinfo_flags = OPTGROUP_NONE,
+ .tv_id = TV_NONE,
+ .properties_required = 0,
+ .properties_provided = 0,
+ .properties_destroyed = 0,
+ .todo_flags_start = 0,
+ .todo_flags_finish = (TODO_update_ssa | TODO_cleanup_cfg | TODO_verify_il),
+
+};
+
+struct afl_pass : gimple_opt_pass {
+
+ afl_pass(bool quiet, unsigned int ratio)
+ : gimple_opt_pass(afl_pass_data, g),
+ be_quiet(quiet),
+ debug(!!getenv("AFL_DEBUG")),
+ inst_ratio(ratio),
+#ifdef AFL_GCC_OUT_OF_LINE
+ out_of_line(!!(AFL_GCC_OUT_OF_LINE)),
+#else
+ out_of_line(getenv("AFL_GCC_OUT_OF_LINE")),
+#endif
+ neverZero(!getenv("AFL_GCC_SKIP_NEVERZERO")),
+ inst_blocks(0) {
+
+ initInstrumentList();
+
+ }
+
+ /* Are we outputting to a non-terminal, or running with AFL_QUIET
+ set? */
+ const bool be_quiet;
+
+ /* Are we running with AFL_DEBUG set? */
+ const bool debug;
+
+ /* How likely (%) is a block to be instrumented? */
+ const unsigned int inst_ratio;
+
+ /* Should we use slow, out-of-line call-based instrumentation? */
+ const bool out_of_line;
+
+ /* Should we make sure the map edge-crossing counters never wrap
+ around to zero? */
+ const bool neverZero;
+
+ /* Count instrumented blocks. */
+ int inst_blocks;
+
+ virtual unsigned int execute(function *fn) {
+
+ if (!isInInstrumentList(fn)) return 0;
+
+ int blocks = 0;
+
+ /* These are temporaries used by inline instrumentation only, that
+ are live throughout the function. */
+ tree ploc = NULL, indx = NULL, map = NULL, map_ptr = NULL, ntry = NULL,
+ cntr = NULL, xaddc = NULL, xincr = NULL;
+
+ basic_block bb;
+ FOR_EACH_BB_FN(bb, fn) {
+
+ if (!instrument_block_p(bb)) continue;
+
+ /* Generate the block identifier. */
+ unsigned bid = R(MAP_SIZE);
+ tree bidt = build_int_cst(sizetype, bid);
+
+ gimple_seq seq = NULL;
+
+ if (out_of_line) {
+
+ static tree afl_trace = get_afl_trace_decl();
+
+ /* Call __afl_trace with bid, the new location; */
+ gcall *call = gimple_build_call(afl_trace, 1, bidt);
+ gimple_seq_add_stmt(&seq, call);
+
+ } else {
+
+ static tree afl_prev_loc = get_afl_prev_loc_decl();
+ static tree afl_area_ptr = get_afl_area_ptr_decl();
+
+ /* Load __afl_prev_loc to a temporary ploc. */
+ if (blocks == 0)
+ ploc = create_tmp_var(TREE_TYPE(afl_prev_loc), ".afl_prev_loc");
+ auto load_loc = gimple_build_assign(ploc, afl_prev_loc);
+ gimple_seq_add_stmt(&seq, load_loc);
+
+ /* Compute the index into the map referenced by area_ptr
+ that we're to update: indx = (sizetype) ploc ^ bid. */
+ if (blocks == 0) indx = create_tmp_var(TREE_TYPE(bidt), ".afl_index");
+ auto conv_ploc =
+ gimple_build_assign(indx, fold_convert(TREE_TYPE(indx), ploc));
+ gimple_seq_add_stmt(&seq, conv_ploc);
+ auto xor_loc = gimple_build_assign(indx, BIT_XOR_EXPR, indx, bidt);
+ gimple_seq_add_stmt(&seq, xor_loc);
+
+ /* Compute the address of that map element. */
+ if (blocks == 0) {
+
+ map = afl_area_ptr;
+ map_ptr = create_tmp_var(TREE_TYPE(afl_area_ptr), ".afl_map_ptr");
+ ntry = create_tmp_var(TREE_TYPE(afl_area_ptr), ".afl_map_entry");
+
+ }
+
+ /* .map_ptr is initialized at the function entry point, if we
+ instrument any blocks, see below. */
+
+ /* .entry = &map_ptr[.index]; */
+ auto idx_map =
+ gimple_build_assign(ntry, POINTER_PLUS_EXPR, map_ptr, indx);
+ gimple_seq_add_stmt(&seq, idx_map);
+
+ /* Increment the counter in idx_map. */
+ tree memref = build2(MEM_REF, TREE_TYPE(TREE_TYPE(ntry)), ntry,
+ build_zero_cst(TREE_TYPE(ntry)));
+ if (blocks == 0)
+ cntr = create_tmp_var(TREE_TYPE(memref), ".afl_edge_count");
+
+ /* Load the count from the entry. */
+ auto load_cntr = gimple_build_assign(cntr, memref);
+ gimple_seq_add_stmt(&seq, load_cntr);
+
+ /* Prepare to add constant 1 to it. */
+ tree incrv = build_one_cst(TREE_TYPE(cntr));
+
+ if (neverZero) {
+
+ /* NeverZero: if count wrapped around to zero, advance to
+ one. */
+ if (blocks == 0) {
+
+ xaddc = create_tmp_var(build_complex_type(TREE_TYPE(memref)),
+ ".afl_edge_xaddc");
+ xincr = create_tmp_var(TREE_TYPE(memref), ".afl_edge_xincr");
+
+ }
+
+ /* Call the ADD_OVERFLOW builtin, to add 1 (in incrv) to
+ count. The builtin yields a complex pair: the result of
+ the add in the real part, and the overflow flag in the
+ imaginary part, */
+ auto_vec<tree> vargs(2);
+ vargs.quick_push(cntr);
+ vargs.quick_push(incrv);
+ gcall *add1_cntr =
+ gimple_build_call_internal_vec(IFN_ADD_OVERFLOW, vargs);
+ gimple_call_set_lhs(add1_cntr, xaddc);
+ gimple_seq_add_stmt(&seq, add1_cntr);
+
+ /* Extract the real part into count. */
+ tree cntrb = build1(REALPART_EXPR, TREE_TYPE(cntr), xaddc);
+ auto xtrct_cntr = gimple_build_assign(cntr, cntrb);
+ gimple_seq_add_stmt(&seq, xtrct_cntr);
+
+ /* Extract the imaginary part into xincr. */
+ tree incrb = build1(IMAGPART_EXPR, TREE_TYPE(xincr), xaddc);
+ auto xtrct_xincr = gimple_build_assign(xincr, incrb);
+ gimple_seq_add_stmt(&seq, xtrct_xincr);
+
+ /* Arrange for the add below to use the overflow flag stored
+ in xincr. */
+ incrv = xincr;
+
+ }
+
+ /* Add the increment (1 or the overflow bit) to count. */
+ auto incr_cntr = gimple_build_assign(cntr, PLUS_EXPR, cntr, incrv);
+ gimple_seq_add_stmt(&seq, incr_cntr);
+
+ /* Store count in the map entry. */
+ auto store_cntr = gimple_build_assign(unshare_expr(memref), cntr);
+ gimple_seq_add_stmt(&seq, store_cntr);
+
+ /* Store bid >> 1 in __afl_prev_loc. */
+ auto shift_loc =
+ gimple_build_assign(ploc, build_int_cst(TREE_TYPE(ploc), bid >> 1));
+ gimple_seq_add_stmt(&seq, shift_loc);
+ auto store_loc = gimple_build_assign(afl_prev_loc, ploc);
+ gimple_seq_add_stmt(&seq, store_loc);
+
+ }
+
+ /* Insert the generated sequence. */
+ gimple_stmt_iterator insp = gsi_after_labels(bb);
+ gsi_insert_seq_before(&insp, seq, GSI_SAME_STMT);
+
+ /* Bump this function's instrumented block counter. */
+ blocks++;
+
+ }
+
+ /* Aggregate the instrumented block count. */
+ inst_blocks += blocks;
+
+ if (blocks) {
+
+ if (out_of_line) return TODO_rebuild_cgraph_edges;
+
+ gimple_seq seq = NULL;
+
+ /* Load afl_area_ptr into map_ptr. We want to do this only
+ once per function. */
+ auto load_ptr = gimple_build_assign(map_ptr, map);
+ gimple_seq_add_stmt(&seq, load_ptr);
+
+ /* Insert it in the edge to the entry block. We don't want to
+ insert it in the first block, since there might be a loop
+ or a goto back to it. Insert in the edge, which may create
+ another block. */
+ edge e = single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(fn));
+ gsi_insert_seq_on_edge_immediate(e, seq);
+
+ }
+
+ return 0;
+
+ }
+
+ /* Decide whether to instrument block BB. Skip it due to the random
+ distribution, or if it's the single successor of all its
+ predecessors. */
+ inline bool instrument_block_p(basic_block bb) {
+
+ if (R(100) >= (long int)inst_ratio) return false;
+
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE(e, ei, bb->preds)
+ if (!single_succ_p(e->src)) return true;
+
+ return false;
+
+ }
+
+ /* Create and return a declaration for the __afl_trace rt function. */
+ static inline tree get_afl_trace_decl() {
+
+ tree type =
+ build_function_type_list(void_type_node, uint16_type_node, NULL_TREE);
+ tree decl = build_fn_decl("__afl_trace", type);
+
+ TREE_PUBLIC(decl) = 1;
+ DECL_EXTERNAL(decl) = 1;
+ DECL_ARTIFICIAL(decl) = 1;
+
+ return decl;
+
+ }
+
+ /* Create and return a declaration for the __afl_prev_loc
+ thread-local variable. */
+ static inline tree get_afl_prev_loc_decl() {
+
+ tree decl = build_decl(BUILTINS_LOCATION, VAR_DECL,
+ get_identifier("__afl_prev_loc"), uint32_type_node);
+ TREE_PUBLIC(decl) = 1;
+ DECL_EXTERNAL(decl) = 1;
+ DECL_ARTIFICIAL(decl) = 1;
+ TREE_STATIC(decl) = 1;
+ set_decl_tls_model(
+ decl, (flag_pic ? TLS_MODEL_INITIAL_EXEC : TLS_MODEL_LOCAL_EXEC));
+ return decl;
+
+ }
+
+ /* Create and return a declaration for the __afl_prev_loc
+ thread-local variable. */
+ static inline tree get_afl_area_ptr_decl() {
+
+ tree type = build_pointer_type(unsigned_char_type_node);
+ tree decl = build_decl(BUILTINS_LOCATION, VAR_DECL,
+ get_identifier("__afl_area_ptr"), type);
+ TREE_PUBLIC(decl) = 1;
+ DECL_EXTERNAL(decl) = 1;
+ DECL_ARTIFICIAL(decl) = 1;
+ TREE_STATIC(decl) = 1;
+
+ return decl;
+
+ }
+
+ /* This is registered as a plugin finalize callback, to print an
+ instrumentation summary unless in quiet mode. */
+ static void plugin_finalize(void *, void *p) {
+
+ opt_pass *op = (opt_pass *)p;
+ afl_pass &self = (afl_pass &)*op;
+
+ if (!self.be_quiet) {
+
+ if (!self.inst_blocks)
+ WARNF("No instrumentation targets found.");
+ else
+ OKF("Instrumented %u locations (%s mode, %s, ratio %u%%).",
+ self.inst_blocks,
+ getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened"),
+ self.out_of_line ? G_("out of line") : G_("inline"),
+ self.inst_ratio);
+
+ }
+
+ }
+
+#define report_fatal_error(msg) BADF(msg)
+
+ std::list<std::string> allowListFiles;
+ std::list<std::string> allowListFunctions;
+ std::list<std::string> denyListFiles;
+ std::list<std::string> denyListFunctions;
+
+ /* Note: this ignore check is also called in isInInstrumentList() */
+ bool isIgnoreFunction(function *F) {
+
+ // Starting from "LLVMFuzzer" these are functions used in libfuzzer based
+ // fuzzing campaign installations, e.g. oss-fuzz
+
+ static const char *ignoreList[] = {
+
+ "asan.",
+ "llvm.",
+ "sancov.",
+ "__ubsan_",
+ "ign.",
+ "__afl_",
+ "_fini",
+ "__libc_csu",
+ "__asan",
+ "__msan",
+ "__cmplog",
+ "__sancov",
+ "msan.",
+ "LLVMFuzzer",
+ "__decide_deferred",
+ "maybe_duplicate_stderr",
+ "discard_output",
+ "close_stdout",
+ "dup_and_close_stderr",
+ "maybe_close_fd_mask",
+ "ExecuteFilesOnyByOne"
+
+ };
+
+ const char *name = IDENTIFIER_POINTER(DECL_NAME(F->decl));
+ int len = IDENTIFIER_LENGTH(DECL_NAME(F->decl));
+
+ for (auto const &ignoreListFunc : ignoreList) {
+
+ if (strncmp(name, ignoreListFunc, len) == 0) { return true; }
+
+ }
+
+ return false;
+
+ }
+
+ void initInstrumentList() {
+
+ char *allowlist = getenv("AFL_GCC_ALLOWLIST");
+ if (!allowlist) allowlist = getenv("AFL_GCC_INSTRUMENT_FILE");
+ if (!allowlist) allowlist = getenv("AFL_GCC_WHITELIST");
+ if (!allowlist) allowlist = getenv("AFL_LLVM_ALLOWLIST");
+ if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE");
+ if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST");
+ char *denylist = getenv("AFL_GCC_DENYLIST");
+ if (!denylist) denylist = getenv("AFL_GCC_BLOCKLIST");
+ if (!denylist) denylist = getenv("AFL_LLVM_DENYLIST");
+ if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST");
+
+ if (allowlist && denylist)
+ FATAL(
+ "You can only specify either AFL_GCC_ALLOWLIST or AFL_GCC_DENYLIST "
+ "but not both!");
+
+ if (allowlist) {
+
+ std::string line;
+ std::ifstream fileStream;
+ fileStream.open(allowlist);
+ if (!fileStream) report_fatal_error("Unable to open AFL_GCC_ALLOWLIST");
+ getline(fileStream, line);
+
+ while (fileStream) {
+
+ int is_file = -1;
+ std::size_t npos;
+ std::string original_line = line;
+
+ line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
+ line.end());
+
+ // remove # and following
+ if ((npos = line.find("#")) != std::string::npos)
+ line = line.substr(0, npos);
+
+ if (line.compare(0, 4, "fun:") == 0) {
+
+ is_file = 0;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 9, "function:") == 0) {
+
+ is_file = 0;
+ line = line.substr(9);
+
+ } else if (line.compare(0, 4, "src:") == 0) {
+
+ is_file = 1;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 7, "source:") == 0) {
+
+ is_file = 1;
+ line = line.substr(7);
+
+ }
+
+ if (line.find(":") != std::string::npos) {
+
+ FATAL("invalid line in AFL_GCC_ALLOWLIST: %s", original_line.c_str());
+
+ }
+
+ if (line.length() > 0) {
+
+ // if the entry contains / or . it must be a file
+ if (is_file == -1)
+ if (line.find("/") != std::string::npos ||
+ line.find(".") != std::string::npos)
+ is_file = 1;
+ // otherwise it is a function
+
+ if (is_file == 1)
+ allowListFiles.push_back(line);
+ else
+ allowListFunctions.push_back(line);
+ getline(fileStream, line);
+
+ }
+
+ }
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "loaded allowlist with %zu file and %zu function entries\n",
+ allowListFiles.size(), allowListFunctions.size());
+
+ }
+
+ if (denylist) {
+
+ std::string line;
+ std::ifstream fileStream;
+ fileStream.open(denylist);
+ if (!fileStream) report_fatal_error("Unable to open AFL_GCC_DENYLIST");
+ getline(fileStream, line);
+
+ while (fileStream) {
+
+ int is_file = -1;
+ std::size_t npos;
+ std::string original_line = line;
+
+ line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
+ line.end());
+
+ // remove # and following
+ if ((npos = line.find("#")) != std::string::npos)
+ line = line.substr(0, npos);
+
+ if (line.compare(0, 4, "fun:") == 0) {
+
+ is_file = 0;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 9, "function:") == 0) {
+
+ is_file = 0;
+ line = line.substr(9);
+
+ } else if (line.compare(0, 4, "src:") == 0) {
+
+ is_file = 1;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 7, "source:") == 0) {
+
+ is_file = 1;
+ line = line.substr(7);
+
+ }
+
+ if (line.find(":") != std::string::npos) {
+
+ FATAL("invalid line in AFL_GCC_DENYLIST: %s", original_line.c_str());
+
+ }
+
+ if (line.length() > 0) {
+
+ // if the entry contains / or . it must be a file
+ if (is_file == -1)
+ if (line.find("/") != std::string::npos ||
+ line.find(".") != std::string::npos)
+ is_file = 1;
+ // otherwise it is a function
+
+ if (is_file == 1)
+ denyListFiles.push_back(line);
+ else
+ denyListFunctions.push_back(line);
+ getline(fileStream, line);
+
+ }
+
+ }
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "loaded denylist with %zu file and %zu function entries\n",
+ denyListFiles.size(), denyListFunctions.size());
+
+ }
+
+ }
+
+ std::string getSourceName(function *F) {
+
+ return DECL_SOURCE_FILE(F->decl);
+
+ }
+
+ bool isInInstrumentList(function *F) {
+
+ bool return_default = true;
+
+ // is this a function with code? If it is external we don't instrument it
+ // anyway and it can't be in the instrument file list. Or if it is it is
+ // ignored.
+ if (isIgnoreFunction(F)) return false;
+
+ if (!denyListFiles.empty() || !denyListFunctions.empty()) {
+
+ if (!denyListFunctions.empty()) {
+
+ std::string instFunction = IDENTIFIER_POINTER(DECL_NAME(F->decl));
+
+ for (std::list<std::string>::iterator it = denyListFunctions.begin();
+ it != denyListFunctions.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (instFunction.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the deny function list, "
+ "not instrumenting ... \n",
+ instFunction.c_str());
+ return false;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!denyListFiles.empty()) {
+
+ std::string source_file = getSourceName(F);
+
+ if (!source_file.empty()) {
+
+ for (std::list<std::string>::iterator it = denyListFiles.begin();
+ it != denyListFiles.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (source_file.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
+
+ return false;
+
+ }
+
+ }
+
+ }
+
+ } else {
+
+ // we could not find out the location. in this case we say it is not
+ // in the instrument file list
+ if (!be_quiet)
+ WARNF(
+ "No debug information found for function %s, will be "
+ "instrumented (recompile with -g -O[1-3]).",
+ IDENTIFIER_POINTER(DECL_NAME(F->decl)));
+
+ }
+
+ }
+
+ }
+
+ // if we do not have a instrument file list return true
+ if (!allowListFiles.empty() || !allowListFunctions.empty()) {
+
+ return_default = false;
+
+ if (!allowListFunctions.empty()) {
+
+ std::string instFunction = IDENTIFIER_POINTER(DECL_NAME(F->decl));
+
+ for (std::list<std::string>::iterator it = allowListFunctions.begin();
+ it != allowListFunctions.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (instFunction.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the allow function list, "
+ "instrumenting ... \n",
+ instFunction.c_str());
+ return true;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!allowListFiles.empty()) {
+
+ std::string source_file = getSourceName(F);
+
+ if (!source_file.empty()) {
+
+ for (std::list<std::string>::iterator it = allowListFiles.begin();
+ it != allowListFiles.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (source_file.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the allowlist (%s), "
+ "instrumenting ... \n",
+ IDENTIFIER_POINTER(DECL_NAME(F->decl)),
+ source_file.c_str());
+ return true;
+
+ }
+
+ }
+
+ }
+
+ } else {
+
+ // we could not find out the location. In this case we say it is not
+ // in the instrument file list
+ if (!be_quiet)
+ WARNF(
+ "No debug information found for function %s, will not be "
+ "instrumented (recompile with -g -O[1-3]).",
+ IDENTIFIER_POINTER(DECL_NAME(F->decl)));
+ return false;
+
+ }
+
+ }
+
+ }
+
+ return return_default;
+
+ }
+
+};
+
+static struct plugin_info afl_plugin = {
+
+ .version = "20200907",
+ .help = G_("AFL gcc plugin\n\
+\n\
+Set AFL_QUIET in the environment to silence it.\n\
+\n\
+Set AFL_INST_RATIO in the environment to a number from 0 to 100\n\
+to control how likely a block will be chosen for instrumentation.\n\
+\n\
+Specify -frandom-seed for reproducible instrumentation.\n\
+"),
+
+};
+
+} // namespace
+
+/* This is the function GCC calls when loading a plugin. Initialize
+ and register further callbacks. */
+int plugin_init(struct plugin_name_args * info,
+ struct plugin_gcc_version *version) {
+
+ if (!plugin_default_version_check(version, &gcc_version))
+ FATAL(G_("GCC and plugin have incompatible versions, expected GCC %d.%d"),
+ GCCPLUGIN_VERSION_MAJOR, GCCPLUGIN_VERSION_MINOR);
+
+ /* Show a banner. */
+ bool quiet = false;
+ if (isatty(2) && !getenv("AFL_QUIET"))
+ SAYF(cCYA "afl-gcc-pass " cBRI VERSION cRST " by <oliva@adacore.com>\n");
+ else
+ quiet = true;
+
+ /* Decide instrumentation ratio. */
+ int inst_ratio = 100;
+ if (char *inst_ratio_str = getenv("AFL_INST_RATIO"))
+ if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
+ inst_ratio > 100)
+ FATAL(G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)"));
+
+ /* Initialize the random number generator with GCC's random seed, in
+ case it was specified in the command line's -frandom-seed for
+ reproducible instrumentation. */
+ srandom(get_random_seed(false));
+
+ const char *name = info->base_name;
+ register_callback(name, PLUGIN_INFO, NULL, &afl_plugin);
+
+ afl_pass * aflp = new afl_pass(quiet, inst_ratio);
+ struct register_pass_info pass_info = {
+
+ .pass = aflp,
+ .reference_pass_name = "ssa",
+ .ref_pass_instance_number = 1,
+ .pos_op = PASS_POS_INSERT_AFTER,
+
+ };
+
+ register_callback(name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info);
+ register_callback(name, PLUGIN_FINISH, afl_pass::plugin_finalize,
+ pass_info.pass);
+
+ if (!quiet)
+ ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."),
+ aflp->out_of_line ? G_("Call-based") : G_("Inline"), inst_ratio,
+ getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened"));
+
+ return 0;
+
+}
+
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
new file mode 100644
index 00000000..189b4ec6
--- /dev/null
+++ b/instrumentation/afl-llvm-common.cc
@@ -0,0 +1,575 @@
+#define AFL_LLVM_PASS
+
+#include "config.h"
+#include "debug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+
+#include <llvm/Support/raw_ostream.h>
+
+#define IS_EXTERN extern
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+static std::list<std::string> allowListFiles;
+static std::list<std::string> allowListFunctions;
+static std::list<std::string> denyListFiles;
+static std::list<std::string> denyListFunctions;
+
+char *getBBName(const llvm::BasicBlock *BB) {
+
+ static char *name;
+
+ if (!BB->getName().empty()) {
+
+ name = strdup(BB->getName().str().c_str());
+ return name;
+
+ }
+
+ std::string Str;
+ raw_string_ostream OS(Str);
+
+#if LLVM_VERSION_MAJOR >= 4 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
+ BB->printAsOperand(OS, false);
+#endif
+ name = strdup(OS.str().c_str());
+ return name;
+
+}
+
+/* Function that we never instrument or analyze */
+/* Note: this ignore check is also called in isInInstrumentList() */
+bool isIgnoreFunction(const llvm::Function *F) {
+
+ // Starting from "LLVMFuzzer" these are functions used in libfuzzer based
+ // fuzzing campaign installations, e.g. oss-fuzz
+
+ static const char *ignoreList[] = {
+
+ "asan.",
+ "llvm.",
+ "sancov.",
+ "__ubsan_",
+ "ign.",
+ "__afl_",
+ "_fini",
+ "__libc_csu",
+ "__asan",
+ "__msan",
+ "__cmplog",
+ "__sancov",
+ "msan.",
+ "LLVMFuzzer",
+ "__decide_deferred",
+ "maybe_duplicate_stderr",
+ "discard_output",
+ "close_stdout",
+ "dup_and_close_stderr",
+ "maybe_close_fd_mask",
+ "ExecuteFilesOnyByOne"
+
+ };
+
+ for (auto const &ignoreListFunc : ignoreList) {
+
+ if (F->getName().startswith(ignoreListFunc)) { return true; }
+
+ }
+
+ return false;
+
+}
+
+void initInstrumentList() {
+
+ char *allowlist = getenv("AFL_LLVM_ALLOWLIST");
+ if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE");
+ if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST");
+ char *denylist = getenv("AFL_LLVM_DENYLIST");
+ if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST");
+
+ if (allowlist && denylist)
+ FATAL(
+ "You can only specify either AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST "
+ "but not both!");
+
+ if (allowlist) {
+
+ std::string line;
+ std::ifstream fileStream;
+ fileStream.open(allowlist);
+ if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_ALLOWLIST");
+ getline(fileStream, line);
+
+ while (fileStream) {
+
+ int is_file = -1;
+ std::size_t npos;
+ std::string original_line = line;
+
+ line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
+ line.end());
+
+ // remove # and following
+ if ((npos = line.find("#")) != std::string::npos)
+ line = line.substr(0, npos);
+
+ if (line.compare(0, 4, "fun:") == 0) {
+
+ is_file = 0;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 9, "function:") == 0) {
+
+ is_file = 0;
+ line = line.substr(9);
+
+ } else if (line.compare(0, 4, "src:") == 0) {
+
+ is_file = 1;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 7, "source:") == 0) {
+
+ is_file = 1;
+ line = line.substr(7);
+
+ }
+
+ if (line.find(":") != std::string::npos) {
+
+ FATAL("invalid line in AFL_LLVM_ALLOWLIST: %s", original_line.c_str());
+
+ }
+
+ if (line.length() > 0) {
+
+ // if the entry contains / or . it must be a file
+ if (is_file == -1)
+ if (line.find("/") != std::string::npos ||
+ line.find(".") != std::string::npos)
+ is_file = 1;
+ // otherwise it is a function
+
+ if (is_file == 1)
+ allowListFiles.push_back(line);
+ else
+ allowListFunctions.push_back(line);
+ getline(fileStream, line);
+
+ }
+
+ }
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "loaded allowlist with %zu file and %zu function entries\n",
+ allowListFiles.size(), allowListFunctions.size());
+
+ }
+
+ if (denylist) {
+
+ std::string line;
+ std::ifstream fileStream;
+ fileStream.open(denylist);
+ if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_DENYLIST");
+ getline(fileStream, line);
+
+ while (fileStream) {
+
+ int is_file = -1;
+ std::size_t npos;
+ std::string original_line = line;
+
+ line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
+ line.end());
+
+ // remove # and following
+ if ((npos = line.find("#")) != std::string::npos)
+ line = line.substr(0, npos);
+
+ if (line.compare(0, 4, "fun:") == 0) {
+
+ is_file = 0;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 9, "function:") == 0) {
+
+ is_file = 0;
+ line = line.substr(9);
+
+ } else if (line.compare(0, 4, "src:") == 0) {
+
+ is_file = 1;
+ line = line.substr(4);
+
+ } else if (line.compare(0, 7, "source:") == 0) {
+
+ is_file = 1;
+ line = line.substr(7);
+
+ }
+
+ if (line.find(":") != std::string::npos) {
+
+ FATAL("invalid line in AFL_LLVM_DENYLIST: %s", original_line.c_str());
+
+ }
+
+ if (line.length() > 0) {
+
+ // if the entry contains / or . it must be a file
+ if (is_file == -1)
+ if (line.find("/") != std::string::npos ||
+ line.find(".") != std::string::npos)
+ is_file = 1;
+ // otherwise it is a function
+
+ if (is_file == 1)
+ denyListFiles.push_back(line);
+ else
+ denyListFunctions.push_back(line);
+ getline(fileStream, line);
+
+ }
+
+ }
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "loaded denylist with %zu file and %zu function entries\n",
+ denyListFiles.size(), denyListFunctions.size());
+
+ }
+
+}
+
+void scanForDangerousFunctions(llvm::Module *M) {
+
+ if (!M) return;
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
+
+ for (GlobalIFunc &IF : M->ifuncs()) {
+
+ StringRef ifunc_name = IF.getName();
+ Constant *r = IF.getResolver();
+ StringRef r_name = cast<Function>(r->getOperand(0))->getName();
+ if (!be_quiet)
+ fprintf(stderr,
+ "Info: Found an ifunc with name %s that points to resolver "
+ "function %s, we will not instrument this, putting it into the "
+ "block list.\n",
+ ifunc_name.str().c_str(), r_name.str().c_str());
+ denyListFunctions.push_back(r_name.str());
+
+ }
+
+ GlobalVariable *GV = M->getNamedGlobal("llvm.global_ctors");
+ if (GV && !GV->isDeclaration() && !GV->hasLocalLinkage()) {
+
+ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+
+ if (InitList) {
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+
+ if (ConstantStruct *CS =
+ dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
+
+ if (CS->getNumOperands() >= 2) {
+
+ if (CS->getOperand(1)->isNullValue())
+ break; // Found a null terminator, stop here.
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+ int Priority = CI ? CI->getSExtValue() : 0;
+
+ Constant *FP = CS->getOperand(1);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+ if (CE->isCast()) FP = CE->getOperand(0);
+ if (Function *F = dyn_cast<Function>(FP)) {
+
+ if (!F->isDeclaration() &&
+ strncmp(F->getName().str().c_str(), "__afl", 5) != 0) {
+
+ if (!be_quiet)
+ fprintf(stderr,
+ "Info: Found constructor function %s with prio "
+ "%u, we will not instrument this, putting it into a "
+ "block list.\n",
+ F->getName().str().c_str(), Priority);
+ denyListFunctions.push_back(F->getName().str());
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+#endif
+
+}
+
+static std::string getSourceName(llvm::Function *F) {
+
+ // let's try to get the filename for the function
+ auto bb = &F->getEntryBlock();
+ BasicBlock::iterator IP = bb->getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+ DebugLoc Loc = IP->getDebugLoc();
+
+#if LLVM_VERSION_MAJOR >= 4 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
+ if (Loc) {
+
+ StringRef instFilename;
+ DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+ if (cDILoc) { instFilename = cDILoc->getFilename(); }
+
+ if (instFilename.str().empty()) {
+
+ /* If the original location is empty, try using the inlined location
+ */
+ DILocation *oDILoc = cDILoc->getInlinedAt();
+ if (oDILoc) { instFilename = oDILoc->getFilename(); }
+
+ }
+
+ return instFilename.str();
+
+ }
+
+#else
+ if (!Loc.isUnknown()) {
+
+ DILocation cDILoc(Loc.getAsMDNode(F->getContext()));
+
+ StringRef instFilename = cDILoc.getFilename();
+
+ /* Continue only if we know where we actually are */
+ return instFilename.str();
+
+ }
+
+#endif
+
+ return std::string("");
+
+}
+
+bool isInInstrumentList(llvm::Function *F) {
+
+ bool return_default = true;
+
+ // is this a function with code? If it is external we don't instrument it
+ // anyway and it can't be in the instrument file list. Or if it is it is
+ // ignored.
+ if (!F->size() || isIgnoreFunction(F)) return false;
+
+ if (!denyListFiles.empty() || !denyListFunctions.empty()) {
+
+ if (!denyListFunctions.empty()) {
+
+ std::string instFunction = F->getName().str();
+
+ for (std::list<std::string>::iterator it = denyListFunctions.begin();
+ it != denyListFunctions.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (instFunction.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the deny function list, "
+ "not instrumenting ... \n",
+ instFunction.c_str());
+ return false;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!denyListFiles.empty()) {
+
+ std::string source_file = getSourceName(F);
+
+ if (!source_file.empty()) {
+
+ for (std::list<std::string>::iterator it = denyListFiles.begin();
+ it != denyListFiles.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (source_file.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
+
+ return false;
+
+ }
+
+ }
+
+ }
+
+ } else {
+
+ // we could not find out the location. in this case we say it is not
+ // in the instrument file list
+ if (!be_quiet)
+ WARNF(
+ "No debug information found for function %s, will be "
+ "instrumented (recompile with -g -O[1-3]).",
+ F->getName().str().c_str());
+
+ }
+
+ }
+
+ }
+
+ // if we do not have a instrument file list return true
+ if (!allowListFiles.empty() || !allowListFunctions.empty()) {
+
+ return_default = false;
+
+ if (!allowListFunctions.empty()) {
+
+ std::string instFunction = F->getName().str();
+
+ for (std::list<std::string>::iterator it = allowListFunctions.begin();
+ it != allowListFunctions.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (instFunction.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the allow function list, "
+ "instrumenting ... \n",
+ instFunction.c_str());
+ return true;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!allowListFiles.empty()) {
+
+ std::string source_file = getSourceName(F);
+
+ if (!source_file.empty()) {
+
+ for (std::list<std::string>::iterator it = allowListFiles.begin();
+ it != allowListFiles.end(); ++it) {
+
+ /* We don't check for filename equality here because
+ * filenames might actually be full paths. Instead we
+ * check that the actual filename ends in the filename
+ * specified in the list. We also allow UNIX-style pattern
+ * matching */
+
+ if (source_file.length() >= it->length()) {
+
+ if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "Function %s is in the allowlist (%s), "
+ "instrumenting ... \n",
+ F->getName().str().c_str(), source_file.c_str());
+ return true;
+
+ }
+
+ }
+
+ }
+
+ } else {
+
+ // we could not find out the location. In this case we say it is not
+ // in the instrument file list
+ if (!be_quiet)
+ WARNF(
+ "No debug information found for function %s, will not be "
+ "instrumented (recompile with -g -O[1-3]).",
+ F->getName().str().c_str());
+ return false;
+
+ }
+
+ }
+
+ }
+
+ return return_default;
+
+}
+
+// Calculate the number of average collisions that would occur if all
+// location IDs would be assigned randomly (like normal afl/afl++).
+// This uses the "balls in bins" algorithm.
+unsigned long long int calculateCollisions(uint32_t edges) {
+
+ double bins = MAP_SIZE;
+ double balls = edges;
+ double step1 = 1 - (1 / bins);
+ double step2 = pow(step1, balls);
+ double step3 = bins * step2;
+ double step4 = round(step3);
+ unsigned long long int empty = step4;
+ unsigned long long int collisions = edges - (MAP_SIZE - empty);
+ return collisions;
+
+}
+
diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h
new file mode 100644
index 00000000..a1561d9c
--- /dev/null
+++ b/instrumentation/afl-llvm-common.h
@@ -0,0 +1,52 @@
+#ifndef __AFLLLVMCOMMON_H
+#define __AFLLLVMCOMMON_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
+typedef long double max_align_t;
+#endif
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/DebugInfo.h"
+ #include "llvm/IR/CFG.h"
+#else
+ #include "llvm/DebugInfo.h"
+ #include "llvm/Support/CFG.h"
+#endif
+
+char * getBBName(const llvm::BasicBlock *BB);
+bool isIgnoreFunction(const llvm::Function *F);
+void initInstrumentList();
+bool isInInstrumentList(llvm::Function *F);
+unsigned long long int calculateCollisions(uint32_t edges);
+void scanForDangerousFunctions(llvm::Module *M);
+
+#ifndef IS_EXTERN
+ #define IS_EXTERN
+#endif
+
+IS_EXTERN int debug;
+IS_EXTERN int be_quiet;
+
+#undef IS_EXTERN
+
+#endif
+
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
new file mode 100644
index 00000000..bd8eb27a
--- /dev/null
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -0,0 +1,615 @@
+/*
+ american fuzzy lop++ - LLVM LTO instrumentation pass
+ ----------------------------------------------------
+
+ Written by Marc Heuse <mh@mh-sec.de>
+
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ This library is plugged into LLVM when invoking clang through afl-clang-lto.
+
+ */
+
+#define AFL_LLVM_PASS
+
+#include "config.h"
+#include "debug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <set>
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+
+#include "afl-llvm-common.h"
+
+#ifndef O_DSYNC
+ #define O_DSYNC O_SYNC
+#endif
+
+using namespace llvm;
+
+namespace {
+
+class AFLdict2filePass : public ModulePass {
+
+ public:
+ static char ID;
+
+ AFLdict2filePass() : ModulePass(ID) {
+
+ if (getenv("AFL_DEBUG")) debug = 1;
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+};
+
+} // namespace
+
+void dict2file(int fd, u8 *mem, u32 len) {
+
+ u32 i, j, binary = 0;
+ char line[MAX_AUTO_EXTRA * 8], tmp[8];
+
+ strcpy(line, "\"");
+ j = 1;
+ for (i = 0; i < len; i++) {
+
+ if (isprint(mem[i])) {
+
+ line[j++] = mem[i];
+
+ } else {
+
+ if (i + 1 != len || mem[i] != 0 || binary || len == 4 || len == 8) {
+
+ line[j] = 0;
+ sprintf(tmp, "\\x%02x", (u8)mem[i]);
+ strcat(line, tmp);
+ j = strlen(line);
+
+ }
+
+ binary = 1;
+
+ }
+
+ }
+
+ line[j] = 0;
+ strcat(line, "\"\n");
+ if (write(fd, line, strlen(line)) <= 0)
+ PFATAL("Could not write to dictionary file");
+ fsync(fd);
+
+ if (!be_quiet) fprintf(stderr, "Found dictionary token: %s", line);
+
+}
+
+bool AFLdict2filePass::runOnModule(Module &M) {
+
+ DenseMap<Value *, std::string *> valueMap;
+ char * ptr;
+ int fd, found = 0;
+
+ /* Show a banner */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
+
+ SAYF(cCYA "afl-llvm-dict2file" VERSION cRST
+ " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
+
+ } else
+
+ be_quiet = 1;
+
+ scanForDangerousFunctions(&M);
+
+ ptr = getenv("AFL_LLVM_DICT2FILE");
+
+ if (!ptr || *ptr != '/')
+ FATAL("AFL_LLVM_DICT2FILE is not set to an absolute path: %s", ptr);
+
+ if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0)
+ PFATAL("Could not open/create %s.", ptr);
+
+ /* Instrument all the things! */
+
+ for (auto &F : M) {
+
+ if (isIgnoreFunction(&F)) continue;
+
+ /* Some implementation notes.
+ *
+ * We try to handle 3 cases:
+ * - memcmp("foo", arg, 3) <- literal string
+ * - static char globalvar[] = "foo";
+ * memcmp(globalvar, arg, 3) <- global variable
+ * - char localvar[] = "foo";
+ * memcmp(locallvar, arg, 3) <- local variable
+ *
+ * The local variable case is the hardest. We can only detect that
+ * case if there is no reassignment or change in the variable.
+ * And it might not work across llvm version.
+ * What we do is hooking the initializer function for local variables
+ * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
+ * variable. And if that variable is then used in a compare function
+ * we use that noted string.
+ * This seems not to work for tokens that have a size <= 4 :-(
+ *
+ * - if the compared length is smaller than the string length we
+ * save the full string. This is likely better for fuzzing but
+ * might be wrong in a few cases depending on optimizers
+ *
+ * - not using StringRef because there is a bug in the llvm 11
+ * checkout I am using which sometimes points to wrong strings
+ *
+ * Over and out. Took me a full day. damn. mh/vh
+ */
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+ CmpInst * cmpInst = nullptr;
+
+ if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ Value * op = cmpInst->getOperand(1);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op);
+
+ /* We skip > 64 bit integers. why? first because their value is
+ difficult to obtain, and second because clang does not support
+ literals > 64 bit (as of llvm 12) */
+
+ if (ilen && ilen->uge(0xffffffffffffffff) == false) {
+
+ u64 val2 = 0, val = ilen->getZExtValue();
+ u32 len = 0;
+ if (val > 0x10000 && val < 0xffffffff) len = 4;
+ if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8;
+
+ if (len) {
+
+ auto c = cmpInst->getPredicate();
+
+ switch (c) {
+
+ case CmpInst::FCMP_OGT: // fall through
+ case CmpInst::FCMP_OLE: // fall through
+ case CmpInst::ICMP_SLE: // fall through
+ case CmpInst::ICMP_SGT:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_UGT: // fall through
+ case CmpInst::FCMP_ULE: // fall through
+ case CmpInst::ICMP_UGT: // fall through
+ case CmpInst::ICMP_ULE:
+ if ((val & 0xffff) != 0xfffe) val2 = val + 1;
+ break;
+
+ case CmpInst::FCMP_OLT: // fall through
+ case CmpInst::FCMP_OGE: // fall through
+ case CmpInst::ICMP_SLT: // fall through
+ case CmpInst::ICMP_SGE:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_ULT: // fall through
+ case CmpInst::FCMP_UGE: // fall through
+ case CmpInst::ICMP_ULT: // fall through
+ case CmpInst::ICMP_UGE:
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ default:
+ val2 = 0;
+
+ }
+
+ dict2file(fd, (u8 *)&val, len);
+ found++;
+ if (val2) {
+
+ dict2file(fd, (u8 *)&val2, len);
+ found++;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ bool isStrcmp = true;
+ bool isMemcmp = true;
+ bool isStrncmp = true;
+ bool isStrcasecmp = true;
+ bool isStrncasecmp = true;
+ bool isIntMemcpy = true;
+ bool isStdString = true;
+ bool addedNull = false;
+ size_t optLen = 0;
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee) continue;
+ if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+ std::string FuncName = Callee->getName().str();
+ isStrcmp &= !FuncName.compare("strcmp");
+ isMemcmp &=
+ (!FuncName.compare("memcmp") || !FuncName.compare("bcmp"));
+ isStrncmp &= !FuncName.compare("strncmp");
+ isStrcasecmp &= !FuncName.compare("strcasecmp");
+ isStrncasecmp &= !FuncName.compare("strncasecmp");
+ isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+ isStdString &= ((FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("compare") != std::string::npos) ||
+ (FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("find") != std::string::npos));
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
+ * prototype */
+ FunctionType *FT = Callee->getFunctionType();
+
+ isStrcmp &=
+ FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+ isStrcasecmp &=
+ FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+ isMemcmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncasecmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStdString &= FT->getNumParams() >= 2 &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy();
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* is a str{n,}{case,}cmp/memcmp, check if we have
+ * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+ * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
+ * memcmp(x, "const", ..) or memcmp("const", x, ..) */
+ Value *Str1P = callInst->getArgOperand(0),
+ *Str2P = callInst->getArgOperand(1);
+ std::string Str1, Str2;
+ StringRef TmpStr;
+ bool HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+ if (TmpStr.empty()) {
+
+ HasStr1 = false;
+
+ } else {
+
+ HasStr1 = true;
+ Str1 = TmpStr.str();
+
+ }
+
+ bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+ if (TmpStr.empty()) {
+
+ HasStr2 = false;
+
+ } else {
+
+ HasStr2 = true;
+ Str2 = TmpStr.str();
+
+ }
+
+ if (debug)
+ fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+ FuncName.c_str(), (void *)Str1P,
+ Str1P->getName().str().c_str(), Str1.c_str(),
+ HasStr1 == true ? "true" : "false", (void *)Str2P,
+ Str2P->getName().str().c_str(), Str2.c_str(),
+ HasStr2 == true ? "true" : "false");
+
+ // we handle the 2nd parameter first because of llvm memcpy
+ if (!HasStr2) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array =
+ dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+ HasStr2 = true;
+ Str2 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // for the internal memcpy routine we only care for the second
+ // parameter and are not reporting anything.
+ if (isIntMemcpy == true) {
+
+ if (HasStr2 == true) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = Str2.size();
+ uint64_t optLength = ilen->getZExtValue();
+ if (literalLength + 1 == optLength) {
+
+ Str2.append("\0", 1); // add null byte
+ addedNull = true;
+
+ }
+
+ }
+
+ valueMap[Str1P] = new std::string(Str2);
+
+ if (debug)
+ fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(),
+ (void *)Str1P);
+ continue;
+
+ }
+
+ continue;
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr2) {
+
+ std::string *strng = valueMap[Str2P];
+ if (strng && !strng->empty()) {
+
+ Str2 = *strng;
+ HasStr2 = true;
+ if (debug)
+ fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+ (void *)Str2P);
+
+ }
+
+ }
+
+ if (!HasStr1) {
+
+ auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array =
+ dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+ HasStr1 = true;
+ Str1 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr1) {
+
+ std::string *strng = valueMap[Str1P];
+ if (strng && !strng->empty()) {
+
+ Str1 = *strng;
+ HasStr1 = true;
+ if (debug)
+ fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+ (void *)Str1P);
+
+ }
+
+ }
+
+ /* handle cases of one string is const, one string is variable */
+ if (!(HasStr1 ^ HasStr2)) continue;
+
+ std::string thestring;
+
+ if (HasStr1)
+ thestring = Str1;
+ else
+ thestring = Str2;
+
+ optLen = thestring.length();
+
+ if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = optLen;
+ optLen = ilen->getZExtValue();
+ if (literalLength + 1 == optLen) { // add null byte
+ thestring.append("\0", 1);
+ addedNull = true;
+
+ }
+
+ }
+
+ }
+
+ // add null byte if this is a string compare function and a null
+ // was not already added
+ if (!isMemcmp) {
+
+ if (addedNull == false) {
+
+ thestring.append("\0", 1); // add null byte
+ optLen++;
+
+ }
+
+ // ensure we do not have garbage
+ size_t offset = thestring.find('\0', 0);
+ if (offset + 1 < optLen) optLen = offset + 1;
+ thestring = thestring.substr(0, optLen);
+
+ }
+
+ // we take the longer string, even if the compare was to a
+ // shorter part. Note that depending on the optimizer of the
+ // compiler this can be wrong, but it is more likely that this
+ // is helping the fuzzer
+ if (optLen != thestring.length()) optLen = thestring.length();
+ if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+ if (optLen < 3) // too short? skip
+ continue;
+
+ ptr = (char *)thestring.c_str();
+
+ dict2file(fd, (u8 *)ptr, optLen);
+ found++;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ close(fd);
+
+ /* Say something nice. */
+
+ if (!be_quiet) {
+
+ if (!found)
+ OKF("No entries for a dictionary found.");
+ else
+ OKF("Wrote %d entries to the dictionary file.\n", found);
+
+ }
+
+ return true;
+
+}
+
+char AFLdict2filePass::ID = 0;
+
+static void registerAFLdict2filePass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new AFLdict2filePass());
+
+}
+
+static RegisterPass<AFLdict2filePass> X("afl-dict2file",
+ "afl++ dict2file instrumentation pass",
+ false, false);
+
+static RegisterStandardPasses RegisterAFLdict2filePass(
+ PassManagerBuilder::EP_OptimizerLast, registerAFLdict2filePass);
+
+static RegisterStandardPasses RegisterAFLdict2filePass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLdict2filePass);
+
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
new file mode 100644
index 00000000..9e026e57
--- /dev/null
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -0,0 +1,1060 @@
+/*
+ american fuzzy lop++ - LLVM LTO instrumentation pass
+ ----------------------------------------------------
+
+ Written by Marc Heuse <mh@mh-sec.de>
+
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ This library is plugged into LLVM when invoking clang through afl-clang-lto.
+
+ */
+
+#define AFL_LLVM_PASS
+
+#include "config.h"
+#include "debug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <set>
+#include <iostream>
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class AFLLTOPass : public ModulePass {
+
+ public:
+ static char ID;
+
+ AFLLTOPass() : ModulePass(ID) {
+
+ char *ptr;
+
+ if (getenv("AFL_DEBUG")) debug = 1;
+ if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
+ if ((afl_global_id = atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+ FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %d\n",
+ ptr, MAP_SIZE - 1);
+
+ skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+ ModulePass::getAnalysisUsage(AU);
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+ protected:
+ int afl_global_id = 1, autodictionary = 1;
+ uint32_t function_minimum_size = 1;
+ uint32_t inst_blocks = 0, inst_funcs = 0, total_instr = 0;
+ uint64_t map_addr = 0x10000;
+ char * skip_nozero = NULL;
+
+};
+
+} // namespace
+
+bool AFLLTOPass::runOnModule(Module &M) {
+
+ LLVMContext & C = M.getContext();
+ std::vector<std::string> dictionary;
+ std::vector<CallInst *> calls;
+ DenseMap<Value *, std::string *> valueMap;
+ std::vector<BasicBlock *> BlockList;
+ char * ptr;
+ FILE * documentFile = NULL;
+ size_t found = 0;
+
+ srand((unsigned int)time(NULL));
+
+ unsigned long long int moduleID =
+ (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid();
+
+ IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+ IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+
+ /* Show a banner */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
+
+ SAYF(cCYA "afl-llvm-lto" VERSION cRST
+ " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
+
+ } else
+
+ be_quiet = 1;
+
+ if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) {
+
+ if ((documentFile = fopen(ptr, "a")) == NULL)
+ WARNF("Cannot access document file %s", ptr);
+
+ }
+
+ // we make this the default as the fixed map has problems with
+ // defered forkserver, early constructors, ifuncs and maybe more
+ /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/
+ map_addr = 0;
+
+ if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) {
+
+ uint64_t val;
+ if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) {
+
+ map_addr = 0;
+
+ } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) {
+
+ FATAL(
+ "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together");
+
+ } else if (strncmp(ptr, "0x", 2) != 0) {
+
+ map_addr = 0x10000; // the default
+
+ } else {
+
+ val = strtoull(ptr, NULL, 16);
+ if (val < 0x100 || val > 0xffffffff00000000) {
+
+ FATAL(
+ "AFL_LLVM_MAP_ADDR must be a value between 0x100 and "
+ "0xffffffff00000000");
+
+ }
+
+ map_addr = val;
+
+ }
+
+ }
+
+ if (debug) { fprintf(stderr, "map address is 0x%lx\n", map_addr); }
+
+ /* Get/set the globals for the SHM region. */
+
+ GlobalVariable *AFLMapPtr = NULL;
+ Value * MapPtrFixed = NULL;
+
+ if (!map_addr) {
+
+ AFLMapPtr =
+ new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+
+ } else {
+
+ ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
+ MapPtrFixed =
+ ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Ty));
+
+ }
+
+ ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
+ ConstantInt *One = ConstantInt::get(Int8Ty, 1);
+
+ // This dumps all inialized global strings - might be useful in the future
+ /*
+ for (auto G=M.getGlobalList().begin(); G!=M.getGlobalList().end(); G++) {
+
+ GlobalVariable &GV=*G;
+ if (!GV.getName().str().empty()) {
+
+ fprintf(stderr, "Global Variable: %s", GV.getName().str().c_str());
+ if (GV.hasInitializer())
+ if (auto *Val = dyn_cast<ConstantDataArray>(GV.getInitializer()))
+ fprintf(stderr, " Value: \"%s\"", Val->getAsString().str().c_str());
+ fprintf(stderr, "\n");
+
+ }
+
+ }
+
+ */
+
+ scanForDangerousFunctions(&M);
+
+ /* Instrument all the things! */
+
+ int inst_blocks = 0;
+
+ for (auto &F : M) {
+
+ /*For debugging
+ AttributeSet X = F.getAttributes().getFnAttributes();
+ fprintf(stderr, "DEBUG: Module %s Function %s attributes %u\n",
+ M.getName().str().c_str(), F.getName().str().c_str(),
+ X.getNumAttributes());
+ */
+
+ if (F.size() < function_minimum_size) continue;
+ if (isIgnoreFunction(&F)) continue;
+
+ // the instrument file list check
+ AttributeList Attrs = F.getAttributes();
+ if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) {
+
+ if (debug)
+ fprintf(stderr,
+ "DEBUG: Function %s is not in a source file that was specified "
+ "in the instrument file list\n",
+ F.getName().str().c_str());
+ continue;
+
+ }
+
+ std::vector<BasicBlock *> InsBlocks;
+
+ if (autodictionary) {
+
+ /* Some implementation notes.
+ *
+ * We try to handle 3 cases:
+ * - memcmp("foo", arg, 3) <- literal string
+ * - static char globalvar[] = "foo";
+ * memcmp(globalvar, arg, 3) <- global variable
+ * - char localvar[] = "foo";
+ * memcmp(locallvar, arg, 3) <- local variable
+ *
+ * The local variable case is the hardest. We can only detect that
+ * case if there is no reassignment or change in the variable.
+ * And it might not work across llvm version.
+ * What we do is hooking the initializer function for local variables
+ * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
+ * variable. And if that variable is then used in a compare function
+ * we use that noted string.
+ * This seems not to work for tokens that have a size <= 4 :-(
+ *
+ * - if the compared length is smaller than the string length we
+ * save the full string. This is likely better for fuzzing but
+ * might be wrong in a few cases depending on optimizers
+ *
+ * - not using StringRef because there is a bug in the llvm 11
+ * checkout I am using which sometimes points to wrong strings
+ *
+ * Over and out. Took me a full day. damn. mh/vh
+ */
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+ CmpInst * cmpInst = nullptr;
+
+ if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ Value * op = cmpInst->getOperand(1);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op);
+
+ if (ilen && ilen->uge(0xffffffffffffffff) == false) {
+
+ u64 val2 = 0, val = ilen->getZExtValue();
+ u32 len = 0;
+ if (val > 0x10000 && val < 0xffffffff) len = 4;
+ if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8;
+
+ if (len) {
+
+ auto c = cmpInst->getPredicate();
+
+ switch (c) {
+
+ case CmpInst::FCMP_OGT: // fall through
+ case CmpInst::FCMP_OLE: // fall through
+ case CmpInst::ICMP_SLE: // fall through
+ case CmpInst::ICMP_SGT:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_UGT: // fall through
+ case CmpInst::FCMP_ULE: // fall through
+ case CmpInst::ICMP_UGT: // fall through
+ case CmpInst::ICMP_ULE:
+ if ((val & 0xffff) != 0xfffe) val2 = val + 1;
+ break;
+
+ case CmpInst::FCMP_OLT: // fall through
+ case CmpInst::FCMP_OGE: // fall through
+ case CmpInst::ICMP_SLT: // fall through
+ case CmpInst::ICMP_SGE:
+
+ // signed comparison and it is a negative constant
+ if ((len == 4 && (val & 80000000)) ||
+ (len == 8 && (val & 8000000000000000))) {
+
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ }
+
+ // fall through
+
+ case CmpInst::FCMP_ULT: // fall through
+ case CmpInst::FCMP_UGE: // fall through
+ case CmpInst::ICMP_ULT: // fall through
+ case CmpInst::ICMP_UGE:
+ if ((val & 0xffff) != 1) val2 = val - 1;
+ break;
+
+ default:
+ val2 = 0;
+
+ }
+
+ dictionary.push_back(std::string((char *)&val, len));
+ found++;
+
+ if (val2) {
+
+ dictionary.push_back(std::string((char *)&val2, len));
+ found++;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ bool isStrcmp = true;
+ bool isMemcmp = true;
+ bool isStrncmp = true;
+ bool isStrcasecmp = true;
+ bool isStrncasecmp = true;
+ bool isIntMemcpy = true;
+ bool isStdString = true;
+ bool addedNull = false;
+ size_t optLen = 0;
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee) continue;
+ if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+ std::string FuncName = Callee->getName().str();
+ isStrcmp &= !FuncName.compare("strcmp");
+ isMemcmp &=
+ (!FuncName.compare("memcmp") || !FuncName.compare("bcmp"));
+ isStrncmp &= !FuncName.compare("strncmp");
+ isStrcasecmp &= !FuncName.compare("strcasecmp");
+ isStrncasecmp &= !FuncName.compare("strncasecmp");
+ isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+ isStdString &=
+ ((FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("compare") != std::string::npos) ||
+ (FuncName.find("basic_string") != std::string::npos &&
+ FuncName.find("find") != std::string::npos));
+
+ /* we do something different here, putting this BB and the
+ successors in a block map */
+ if (!FuncName.compare("__afl_persistent_loop")) {
+
+ BlockList.push_back(&BB);
+ /*
+ for (succ_iterator SI = succ_begin(&BB), SE =
+ succ_end(&BB); SI != SE; ++SI) {
+
+ BasicBlock *succ = *SI;
+ BlockList.push_back(succ);
+
+ }
+
+ */
+
+ }
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
+ * prototype */
+ FunctionType *FT = Callee->getFunctionType();
+
+ isStrcmp &= FT->getNumParams() == 2 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext());
+ isStrcasecmp &= FT->getNumParams() == 2 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext());
+ isMemcmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncasecmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStdString &= FT->getNumParams() >= 2 &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy();
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy && !isStdString)
+ continue;
+
+ /* is a str{n,}{case,}cmp/memcmp, check if we have
+ * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+ * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
+ * memcmp(x, "const", ..) or memcmp("const", x, ..) */
+ Value *Str1P = callInst->getArgOperand(0),
+ *Str2P = callInst->getArgOperand(1);
+ std::string Str1, Str2;
+ StringRef TmpStr;
+ bool HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+ if (TmpStr.empty()) {
+
+ HasStr1 = false;
+
+ } else {
+
+ HasStr1 = true;
+ Str1 = TmpStr.str();
+
+ }
+
+ bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+ if (TmpStr.empty()) {
+
+ HasStr2 = false;
+
+ } else {
+
+ HasStr2 = true;
+ Str2 = TmpStr.str();
+
+ }
+
+ if (debug)
+ fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+ FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
+ Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
+ Str2P->getName().str().c_str(), Str2.c_str(),
+ HasStr2 == true ? "true" : "false");
+
+ // we handle the 2nd parameter first because of llvm memcpy
+ if (!HasStr2) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array = dyn_cast<ConstantDataArray>(
+ Var->getInitializer())) {
+
+ HasStr2 = true;
+ Str2 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // for the internal memcpy routine we only care for the second
+ // parameter and are not reporting anything.
+ if (isIntMemcpy == true) {
+
+ if (HasStr2 == true) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = Str2.size();
+ uint64_t optLength = ilen->getZExtValue();
+ if (literalLength + 1 == optLength) {
+
+ Str2.append("\0", 1); // add null byte
+ addedNull = true;
+
+ }
+
+ }
+
+ valueMap[Str1P] = new std::string(Str2);
+
+ if (debug)
+ fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
+ continue;
+
+ }
+
+ continue;
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr2) {
+
+ std::string *strng = valueMap[Str2P];
+ if (strng && !strng->empty()) {
+
+ Str2 = *strng;
+ HasStr2 = true;
+ if (debug)
+ fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+ Str2P);
+
+ }
+
+ }
+
+ if (!HasStr1) {
+
+ auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array = dyn_cast<ConstantDataArray>(
+ Var->getInitializer())) {
+
+ HasStr1 = true;
+ Str1 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr1) {
+
+ std::string *strng = valueMap[Str1P];
+ if (strng && !strng->empty()) {
+
+ Str1 = *strng;
+ HasStr1 = true;
+ if (debug)
+ fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+ Str1P);
+
+ }
+
+ }
+
+ /* handle cases of one string is const, one string is variable */
+ if (!(HasStr1 ^ HasStr2)) continue;
+
+ std::string thestring;
+
+ if (HasStr1)
+ thestring = Str1;
+ else
+ thestring = Str2;
+
+ optLen = thestring.length();
+
+ if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = optLen;
+ optLen = ilen->getZExtValue();
+ if (literalLength + 1 == optLen) { // add null byte
+ thestring.append("\0", 1);
+ addedNull = true;
+
+ }
+
+ }
+
+ }
+
+ // add null byte if this is a string compare function and a null
+ // was not already added
+ if (!isMemcmp) {
+
+ if (addedNull == false) {
+
+ thestring.append("\0", 1); // add null byte
+ optLen++;
+
+ }
+
+ // ensure we do not have garbage
+ size_t offset = thestring.find('\0', 0);
+ if (offset + 1 < optLen) optLen = offset + 1;
+ thestring = thestring.substr(0, optLen);
+
+ }
+
+ if (!be_quiet) {
+
+ std::string outstring;
+ fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen,
+ thestring.length());
+ for (uint8_t i = 0; i < thestring.length(); i++) {
+
+ uint8_t c = thestring[i];
+ if (c <= 32 || c >= 127)
+ fprintf(stderr, "\\x%02x", c);
+ else
+ fprintf(stderr, "%c", c);
+
+ }
+
+ fprintf(stderr, "\"\n");
+
+ }
+
+ // we take the longer string, even if the compare was to a
+ // shorter part. Note that depending on the optimizer of the
+ // compiler this can be wrong, but it is more likely that this
+ // is helping the fuzzer
+ if (optLen != thestring.length()) optLen = thestring.length();
+ if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+ if (optLen < MIN_AUTO_EXTRA) // too short? skip
+ continue;
+
+ dictionary.push_back(thestring.substr(0, optLen));
+
+ }
+
+ }
+
+ }
+
+ }
+
+ for (auto &BB : F) {
+
+ if (F.size() == 1) {
+
+ InsBlocks.push_back(&BB);
+ continue;
+
+ }
+
+ uint32_t succ = 0;
+ for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB); SI != SE;
+ ++SI)
+ if ((*SI)->size() > 0) succ++;
+ if (succ < 2) // no need to instrument
+ continue;
+
+ if (BlockList.size()) {
+
+ int skip = 0;
+ for (uint32_t k = 0; k < BlockList.size(); k++) {
+
+ if (&BB == BlockList[k]) {
+
+ if (debug)
+ fprintf(stderr,
+ "DEBUG: Function %s skipping BB with/after __afl_loop\n",
+ F.getName().str().c_str());
+ skip = 1;
+
+ }
+
+ }
+
+ if (skip) continue;
+
+ }
+
+ InsBlocks.push_back(&BB);
+
+ }
+
+ if (InsBlocks.size() > 0) {
+
+ uint32_t i = InsBlocks.size();
+
+ do {
+
+ --i;
+ BasicBlock * newBB = NULL;
+ BasicBlock * origBB = &(*InsBlocks[i]);
+ std::vector<BasicBlock *> Successors;
+ Instruction * TI = origBB->getTerminator();
+ uint32_t fs = origBB->getParent()->size();
+ uint32_t countto;
+
+ for (succ_iterator SI = succ_begin(origBB), SE = succ_end(origBB);
+ SI != SE; ++SI) {
+
+ BasicBlock *succ = *SI;
+ Successors.push_back(succ);
+
+ }
+
+ if (fs == 1) {
+
+ newBB = origBB;
+ countto = 1;
+
+ } else {
+
+ if (TI == NULL || TI->getNumSuccessors() < 2) continue;
+ countto = Successors.size();
+
+ }
+
+ // if (Successors.size() != TI->getNumSuccessors())
+ // FATAL("Different successor numbers %lu <-> %u\n", Successors.size(),
+ // TI->getNumSuccessors());
+
+ for (uint32_t j = 0; j < countto; j++) {
+
+ if (fs != 1) newBB = llvm::SplitEdge(origBB, Successors[j]);
+
+ if (!newBB) {
+
+ if (!be_quiet) WARNF("Split failed!");
+ continue;
+
+ }
+
+ if (documentFile) {
+
+ fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n",
+ moduleID, F.getName().str().c_str(), afl_global_id);
+
+ }
+
+ BasicBlock::iterator IP = newBB->getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+
+ /* Set the ID of the inserted basic block */
+
+ ConstantInt *CurLoc = ConstantInt::get(Int32Ty, afl_global_id++);
+
+ /* Load SHM pointer */
+
+ Value *MapPtrIdx;
+
+ if (map_addr) {
+
+ MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc);
+
+ } else {
+
+ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+ MapPtr->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+ MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc);
+
+ }
+
+ /* Update bitmap */
+
+ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+ Counter->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ Value *Incr = IRB.CreateAdd(Counter, One);
+
+ if (skip_nozero == NULL) {
+
+ auto cf = IRB.CreateICmpEQ(Incr, Zero);
+ auto carry = IRB.CreateZExt(cf, Int8Ty);
+ Incr = IRB.CreateAdd(Incr, carry);
+
+ }
+
+ IRB.CreateStore(Incr, MapPtrIdx)
+ ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ // done :)
+
+ inst_blocks++;
+
+ }
+
+ } while (i > 0);
+
+ }
+
+ }
+
+ if (documentFile) fclose(documentFile);
+ documentFile = NULL;
+
+ // save highest location ID to global variable
+ // do this after each function to fail faster
+ if (!be_quiet && afl_global_id > MAP_SIZE &&
+ afl_global_id > FS_OPT_MAX_MAPSIZE) {
+
+ uint32_t pow2map = 1, map = afl_global_id;
+ while ((map = map >> 1))
+ pow2map++;
+ WARNF(
+ "We have %u blocks to instrument but the map size is only %u. Either "
+ "edit config.h and set MAP_SIZE_POW2 from %u to %u, then recompile "
+ "afl-fuzz and llvm_mode and then make this target - or set "
+ "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this "
+ "target.",
+ afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id);
+
+ }
+
+ if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) {
+
+ // yes we could create our own function, insert it into ctors ...
+ // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o
+
+ Function *f = M.getFunction("__afl_auto_init_globals");
+
+ if (!f) {
+
+ fprintf(stderr,
+ "Error: init function could not be found (this should not "
+ "happen)\n");
+ exit(-1);
+
+ }
+
+ BasicBlock *bb = &f->getEntryBlock();
+ if (!bb) {
+
+ fprintf(stderr,
+ "Error: init function does not have an EntryBlock (this should "
+ "not happen)\n");
+ exit(-1);
+
+ }
+
+ BasicBlock::iterator IP = bb->getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+
+ if (map_addr) {
+
+ GlobalVariable *AFLMapAddrFixed = new GlobalVariable(
+ M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr");
+ ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr);
+ StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed);
+ StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
+
+ uint32_t write_loc = afl_global_id;
+
+ if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+
+ GlobalVariable *AFLFinalLoc = new GlobalVariable(
+ M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
+ ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc);
+ StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
+ StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ if (dictionary.size()) {
+
+ size_t memlen = 0, count = 0, offset = 0;
+ char * ptr;
+
+ // sort and unique the dictionary
+ std::sort(dictionary.begin(), dictionary.end());
+ auto last = std::unique(dictionary.begin(), dictionary.end());
+ dictionary.erase(last, dictionary.end());
+
+ for (auto token : dictionary) {
+
+ memlen += token.length();
+ count++;
+
+ }
+
+ if (!be_quiet)
+ printf("AUTODICTIONARY: %lu string%s found\n", count,
+ count == 1 ? "" : "s");
+
+ if (count) {
+
+ if ((ptr = (char *)malloc(memlen + count)) == NULL) {
+
+ fprintf(stderr, "Error: malloc for %lu bytes failed!\n",
+ memlen + count);
+ exit(-1);
+
+ }
+
+ count = 0;
+
+ for (auto token : dictionary) {
+
+ if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
+
+ ptr[offset++] = (uint8_t)token.length();
+ memcpy(ptr + offset, token.c_str(), token.length());
+ offset += token.length();
+ count++;
+
+ }
+
+ }
+
+ GlobalVariable *AFLDictionaryLen =
+ new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
+ 0, "__afl_dictionary_len");
+ ConstantInt *const_len = ConstantInt::get(Int32Ty, offset);
+ StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
+ StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset);
+ GlobalVariable *AFLInternalDictionary = new GlobalVariable(
+ M, ArrayTy, true, GlobalValue::ExternalLinkage,
+ ConstantDataArray::get(C,
+ *(new ArrayRef<char>((char *)ptr, offset))),
+ "__afl_internal_dictionary");
+ AFLInternalDictionary->setInitializer(ConstantDataArray::get(
+ C, *(new ArrayRef<char>((char *)ptr, offset))));
+ AFLInternalDictionary->setConstant(true);
+
+ GlobalVariable *AFLDictionary = new GlobalVariable(
+ M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage,
+ 0, "__afl_dictionary");
+
+ Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero);
+ Value *AFLDictPtr =
+ IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0));
+ StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
+ StoreDict->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ }
+
+ /* Say something nice. */
+
+ if (!be_quiet) {
+
+ if (!inst_blocks)
+ WARNF("No instrumentation targets found.");
+ else {
+
+ char modeline[100];
+ snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+ getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+ getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+ getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+ getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+ OKF("Instrumented %u locations with no collisions (on average %llu "
+ "collisions would be in afl-gcc/afl-clang-fast) (%s mode).",
+ inst_blocks, calculateCollisions(inst_blocks), modeline);
+
+ }
+
+ }
+
+ return true;
+
+}
+
+char AFLLTOPass::ID = 0;
+
+static void registerAFLLTOPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new AFLLTOPass());
+
+}
+
+static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass",
+ false, false);
+
+static RegisterStandardPasses RegisterAFLLTOPass(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass);
+
diff --git a/instrumentation/afl-llvm-lto-instrumentlist.so.cc b/instrumentation/afl-llvm-lto-instrumentlist.so.cc
new file mode 100644
index 00000000..a7331444
--- /dev/null
+++ b/instrumentation/afl-llvm-lto-instrumentlist.so.cc
@@ -0,0 +1,147 @@
+/*
+ american fuzzy lop++ - LLVM-mode instrumentation pass
+ ---------------------------------------------------
+
+ Written by Laszlo Szekeres <lszekeres@google.com> and
+ Michal Zalewski
+
+ LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted
+ from afl-as.c are Michal's fault.
+
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ This library is plugged into LLVM when invoking clang through afl-clang-fast.
+ It tells the compiler to add code roughly equivalent to the bits discussed
+ in ../afl-as.h.
+
+ */
+
+#define AFL_LLVM_PASS
+
+#include "config.h"
+#include "debug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+#include <fnmatch.h>
+
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/IR/CFG.h"
+
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class AFLcheckIfInstrument : public ModulePass {
+
+ public:
+ static char ID;
+ AFLcheckIfInstrument() : ModulePass(ID) {
+
+ if (getenv("AFL_DEBUG")) debug = 1;
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+ // StringRef getPassName() const override {
+
+ // return "American Fuzzy Lop Instrumentation";
+ // }
+
+ protected:
+ std::list<std::string> myInstrumentList;
+
+};
+
+} // namespace
+
+char AFLcheckIfInstrument::ID = 0;
+
+bool AFLcheckIfInstrument::runOnModule(Module &M) {
+
+ /* Show a banner */
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+
+ SAYF(cCYA "afl-llvm-lto-instrumentlist" VERSION cRST
+ " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
+
+ } else if (getenv("AFL_QUIET"))
+
+ be_quiet = 1;
+
+ for (auto &F : M) {
+
+ if (F.size() < 1) continue;
+
+ // fprintf(stderr, "F:%s\n", F.getName().str().c_str());
+
+ if (isInInstrumentList(&F)) {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST "function %s is in the instrument file list\n",
+ F.getName().str().c_str());
+
+ } else {
+
+ if (debug)
+ SAYF(cMGN "[D] " cRST
+ "function %s is NOT in the instrument file list\n",
+ F.getName().str().c_str());
+
+ auto & Ctx = F.getContext();
+ AttributeList Attrs = F.getAttributes();
+ AttrBuilder NewAttrs;
+ NewAttrs.addAttribute("skipinstrument");
+ F.setAttributes(
+ Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+
+ }
+
+ }
+
+ return true;
+
+}
+
+static void registerAFLcheckIfInstrumentpass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new AFLcheckIfInstrument());
+
+}
+
+static RegisterStandardPasses RegisterAFLcheckIfInstrumentpass(
+ PassManagerBuilder::EP_ModuleOptimizerEarly,
+ registerAFLcheckIfInstrumentpass);
+
+static RegisterStandardPasses RegisterAFLcheckIfInstrumentpass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0,
+ registerAFLcheckIfInstrumentpass);
+
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
new file mode 100644
index 00000000..8c8c987a
--- /dev/null
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -0,0 +1,654 @@
+/*
+ american fuzzy lop++ - LLVM-mode instrumentation pass
+ ---------------------------------------------------
+
+ Written by Laszlo Szekeres <lszekeres@google.com>,
+ Adrian Herrera <adrian.herrera@anu.edu.au>,
+ Michal Zalewski
+
+ LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted
+ from afl-as.c are Michal's fault.
+
+ NGRAM previous location coverage comes from Adrian Herrera.
+
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ This library is plugged into LLVM when invoking clang through afl-clang-fast.
+ It tells the compiler to add code roughly equivalent to the bits discussed
+ in ../afl-as.h.
+
+ */
+
+#define AFL_LLVM_PASS
+
+#include "config.h"
+#include "debug.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
+typedef long double max_align_t;
+#endif
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/DebugInfo.h"
+ #include "llvm/IR/CFG.h"
+#else
+ #include "llvm/DebugInfo.h"
+ #include "llvm/Support/CFG.h"
+#endif
+
+#include "afl-llvm-common.h"
+#include "llvm-ngram-coverage.h"
+
+using namespace llvm;
+
+namespace {
+
+class AFLCoverage : public ModulePass {
+
+ public:
+ static char ID;
+ AFLCoverage() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+ protected:
+ uint32_t ngram_size = 0;
+ uint32_t map_size = MAP_SIZE;
+ uint32_t function_minimum_size = 1;
+ char * ctx_str = NULL, *skip_nozero = NULL;
+
+};
+
+} // namespace
+
+char AFLCoverage::ID = 0;
+
+/* needed up to 3.9.0 */
+#if LLVM_VERSION_MAJOR == 3 && \
+ (LLVM_VERSION_MINOR < 9 || \
+ (LLVM_VERSION_MINOR == 9 && LLVM_VERSION_PATCH < 1))
+uint64_t PowerOf2Ceil(unsigned in) {
+
+ uint64_t in64 = in - 1;
+ in64 |= (in64 >> 1);
+ in64 |= (in64 >> 2);
+ in64 |= (in64 >> 4);
+ in64 |= (in64 >> 8);
+ in64 |= (in64 >> 16);
+ in64 |= (in64 >> 32);
+ return in64 + 1;
+
+}
+
+#endif
+
+/* #if LLVM_VERSION_STRING >= "4.0.1" */
+#if LLVM_VERSION_MAJOR > 4 || \
+ (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
+ #define AFL_HAVE_VECTOR_INTRINSICS 1
+#endif
+bool AFLCoverage::runOnModule(Module &M) {
+
+ LLVMContext &C = M.getContext();
+
+ IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ IntegerType *IntLocTy =
+ IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT);
+#endif
+ struct timeval tv;
+ struct timezone tz;
+ u32 rand_seed;
+ unsigned int cur_loc = 0;
+
+ /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */
+ gettimeofday(&tv, &tz);
+ rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
+ AFL_SR(rand_seed);
+
+ /* Show a banner */
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ if (getenv("AFL_DEBUG")) debug = 1;
+
+ if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+
+ SAYF(cCYA "afl-llvm-pass" VERSION cRST
+ " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n");
+
+ } else
+
+ be_quiet = 1;
+
+ /*
+ char *ptr;
+ if ((ptr = getenv("AFL_MAP_SIZE")) || (ptr = getenv("AFL_MAPSIZE"))) {
+
+ map_size = atoi(ptr);
+ if (map_size < 8 || map_size > (1 << 29))
+ FATAL("illegal AFL_MAP_SIZE %u, must be between 2^3 and 2^30",
+ map_size); if (map_size % 8) map_size = (((map_size >> 3) + 1) << 3);
+
+ }
+
+ */
+
+ /* Decide instrumentation ratio */
+
+ char * inst_ratio_str = getenv("AFL_INST_RATIO");
+ unsigned int inst_ratio = 100;
+
+ if (inst_ratio_str) {
+
+ if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
+ inst_ratio > 100)
+ FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)");
+
+ }
+
+#if LLVM_VERSION_MAJOR < 9
+ char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
+#endif
+ skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
+
+ unsigned PrevLocSize = 0;
+
+ char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
+ if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+ ctx_str = getenv("AFL_LLVM_CTX");
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ /* Decide previous location vector size (must be a power of two) */
+ VectorType *PrevLocTy = NULL;
+
+ if (ngram_size_str)
+ if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 ||
+ ngram_size > NGRAM_SIZE_MAX)
+ FATAL(
+ "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX "
+ "(%u))",
+ NGRAM_SIZE_MAX);
+
+ if (ngram_size == 1) ngram_size = 0;
+ if (ngram_size)
+ PrevLocSize = ngram_size - 1;
+ else
+#else
+ if (ngram_size_str)
+ #ifndef LLVM_VERSION_PATCH
+ FATAL(
+ "Sorry, NGRAM branch coverage is not supported with llvm version "
+ "%d.%d.%d!",
+ LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
+ #else
+ FATAL(
+ "Sorry, NGRAM branch coverage is not supported with llvm version "
+ "%d.%d.%d!",
+ LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
+ #endif
+#endif
+ PrevLocSize = 1;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ int PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
+ if (ngram_size)
+ PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize
+ #if LLVM_VERSION_MAJOR >= 12
+ ,
+ false
+ #endif
+ );
+#endif
+
+ /* Get globals for the SHM region and the previous location. Note that
+ __afl_prev_loc is thread-local. */
+
+ GlobalVariable *AFLMapPtr =
+ new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
+ GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+ GlobalVariable *AFLPrevLoc;
+ GlobalVariable *AFLContext = NULL;
+
+ if (ctx_str)
+#ifdef __ANDROID__
+ AFLContext = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
+#else
+ AFLContext = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx", 0,
+ GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size)
+ #ifdef __ANDROID__
+ AFLPrevLoc = new GlobalVariable(
+ M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+ /* Initializer */ nullptr, "__afl_prev_loc");
+ #else
+ AFLPrevLoc = new GlobalVariable(
+ M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+ /* Initializer */ nullptr, "__afl_prev_loc",
+ /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+ /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+ #endif
+ else
+#endif
+#ifdef __ANDROID__
+ AFLPrevLoc = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
+#else
+ AFLPrevLoc = new GlobalVariable(
+ M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0,
+ GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ /* Create the vector shuffle mask for updating the previous block history.
+ Note that the first element of the vector will store cur_loc, so just set
+ it to undef to allow the optimizer to do its thing. */
+
+ SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)};
+
+ for (unsigned I = 0; I < PrevLocSize - 1; ++I)
+ PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+ for (int I = PrevLocSize; I < PrevLocVecSize; ++I)
+ PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
+
+ Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+#endif
+
+ // other constants we need
+ ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
+ ConstantInt *One = ConstantInt::get(Int8Ty, 1);
+
+ LoadInst *PrevCtx = NULL; // CTX sensitive coverage
+
+ /* Instrument all the things! */
+
+ int inst_blocks = 0;
+ scanForDangerousFunctions(&M);
+
+ for (auto &F : M) {
+
+ int has_calls = 0;
+ if (debug)
+ fprintf(stderr, "FUNCTION: %s (%zu)\n", F.getName().str().c_str(),
+ F.size());
+
+ if (!isInInstrumentList(&F)) continue;
+
+ if (F.size() < function_minimum_size) continue;
+
+ for (auto &BB : F) {
+
+ BasicBlock::iterator IP = BB.getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+
+ // Context sensitive coverage
+ if (ctx_str && &BB == &F.getEntryBlock()) {
+
+ // load the context ID of the previous function and write to to a local
+ // variable on the stack
+ PrevCtx = IRB.CreateLoad(AFLContext);
+ PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ // does the function have calls? and is any of the calls larger than one
+ // basic block?
+ for (auto &BB : F) {
+
+ if (has_calls) break;
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee || Callee->size() < function_minimum_size)
+ continue;
+ else {
+
+ has_calls = 1;
+ break;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // if yes we store a context ID for this function in the global var
+ if (has_calls) {
+
+ ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
+ StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+ StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ if (AFL_R(100) >= inst_ratio) continue;
+
+ /* Make up cur_loc */
+
+ // cur_loc++;
+ cur_loc = AFL_R(map_size);
+
+/* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63).
+ The inline function successors() is not inlined and also not found at runtime
+ :-( As I am unable to detect Ubuntu18.04 heree, the next best thing is to
+ disable this optional optimization for LLVM 6.0.0 and Linux */
+#if !(LLVM_VERSION_MAJOR == 6 && LLVM_VERSION_MINOR == 0) || !defined __linux__
+ // only instrument if this basic block is the destination of a previous
+ // basic block that has multiple successors
+ // this gets rid of ~5-10% of instrumentations that are unnecessary
+ // result: a little more speed and less map pollution
+ int more_than_one = -1;
+ // fprintf(stderr, "BB %u: ", cur_loc);
+ for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E;
+ ++PI) {
+
+ BasicBlock *Pred = *PI;
+
+ int count = 0;
+ if (more_than_one == -1) more_than_one = 0;
+ // fprintf(stderr, " %p=>", Pred);
+
+ for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E;
+ ++SI) {
+
+ BasicBlock *Succ = *SI;
+
+ // if (count > 0)
+ // fprintf(stderr, "|");
+ if (Succ != NULL) count++;
+ // fprintf(stderr, "%p", Succ);
+
+ }
+
+ if (count > 1) more_than_one = 1;
+
+ }
+
+ // fprintf(stderr, " == %d\n", more_than_one);
+ if (F.size() > 1 && more_than_one != 1) {
+
+ // in CTX mode we have to restore the original context for the caller -
+ // she might be calling other functions which need the correct CTX
+ if (ctx_str && has_calls) {
+
+ Instruction *Inst = BB.getTerminator();
+ if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+ IRBuilder<> Post_IRB(Inst);
+ StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+ RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ continue;
+
+ }
+
+#endif
+
+ ConstantInt *CurLoc;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size)
+ CurLoc = ConstantInt::get(IntLocTy, cur_loc);
+ else
+#endif
+ CurLoc = ConstantInt::get(Int32Ty, cur_loc);
+
+ /* Load prev_loc */
+
+ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
+ PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+ Value *PrevLocTrans;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ /* "For efficiency, we propose to hash the tuple as a key into the
+ hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where
+ prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
+
+ if (ngram_size)
+ PrevLocTrans =
+ IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty());
+ else
+#endif
+ PrevLocTrans = PrevLoc;
+
+ if (ctx_str)
+ PrevLocTrans =
+ IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
+ else
+ PrevLocTrans = IRB.CreateZExt(PrevLocTrans, IRB.getInt32Ty());
+
+ /* Load SHM pointer */
+
+ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
+ MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ Value *MapPtrIdx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size)
+ MapPtrIdx = IRB.CreateGEP(
+ MapPtr,
+ IRB.CreateZExt(
+ IRB.CreateXor(PrevLocTrans, IRB.CreateZExt(CurLoc, Int32Ty)),
+ Int32Ty));
+ else
+#endif
+ MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc));
+
+ /* Update bitmap */
+
+ LoadInst *Counter = IRB.CreateLoad(MapPtrIdx);
+ Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ Value *Incr = IRB.CreateAdd(Counter, One);
+
+#if LLVM_VERSION_MAJOR < 9
+ if (neverZero_counters_str !=
+ NULL) { // with llvm 9 we make this the default as the bug in llvm is
+ // then fixed
+#else
+ if (!skip_nozero) {
+
+#endif
+ /* hexcoder: Realize a counter that skips zero during overflow.
+ * Once this counter reaches its maximum value, it next increments to 1
+ *
+ * Instead of
+ * Counter + 1 -> Counter
+ * we inject now this
+ * Counter + 1 -> {Counter, OverflowFlag}
+ * Counter + OverflowFlag -> Counter
+ */
+
+ auto cf = IRB.CreateICmpEQ(Incr, Zero);
+ auto carry = IRB.CreateZExt(cf, Int8Ty);
+ Incr = IRB.CreateAdd(Incr, carry);
+
+ }
+
+ IRB.CreateStore(Incr, MapPtrIdx)
+ ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ /* Update prev_loc history vector (by placing cur_loc at the head of the
+ vector and shuffle the other elements back by one) */
+
+ StoreInst *Store;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+ if (ngram_size) {
+
+ Value *ShuffledPrevLoc = IRB.CreateShuffleVector(
+ PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask);
+ Value *UpdatedPrevLoc = IRB.CreateInsertElement(
+ ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0);
+
+ Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc);
+ Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+ } else
+
+#endif
+ {
+
+ Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
+ AFLPrevLoc);
+
+ }
+
+ // in CTX mode we have to restore the original context for the caller -
+ // she might be calling other functions which need the correct CTX.
+ // Currently this is only needed for the Ubuntu clang-6.0 bug
+ if (ctx_str && has_calls) {
+
+ Instruction *Inst = BB.getTerminator();
+ if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+ IRBuilder<> Post_IRB(Inst);
+ StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+ RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ inst_blocks++;
+
+ }
+
+ }
+
+ /*
+ // This is currently disabled because we not only need to create/insert a
+ // function (easy), but also add it as a constructor with an ID < 5
+
+ if (getenv("AFL_LLVM_DONTWRITEID") == NULL) {
+
+ // yes we could create our own function, insert it into ctors ...
+ // but this would be a pain in the butt ... so we use afl-llvm-rt.o
+
+ Function *f = ...
+
+ if (!f) {
+
+ fprintf(stderr,
+ "Error: init function could not be created (this should not
+ happen)\n"); exit(-1);
+
+ }
+
+ ... constructor for f = 4
+
+ BasicBlock *bb = &f->getEntryBlock();
+ if (!bb) {
+
+ fprintf(stderr,
+ "Error: init function does not have an EntryBlock (this should
+ not happen)\n"); exit(-1);
+
+ }
+
+ BasicBlock::iterator IP = bb->getFirstInsertionPt();
+ IRBuilder<> IRB(&(*IP));
+
+ if (map_size <= 0x800000) {
+
+ GlobalVariable *AFLFinalLoc = new GlobalVariable(
+ M, Int32Ty, true, GlobalValue::ExternalLinkage, 0,
+ "__afl_final_loc");
+ ConstantInt *const_loc = ConstantInt::get(Int32Ty, map_size);
+ StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
+ StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
+ MDNode::get(C, None));
+
+ }
+
+ }
+
+ */
+
+ /* Say something nice. */
+
+ if (!be_quiet) {
+
+ if (!inst_blocks)
+ WARNF("No instrumentation targets found.");
+ else {
+
+ char modeline[100];
+ snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
+ getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
+ getenv("AFL_USE_ASAN") ? ", ASAN" : "",
+ getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+ getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
+ getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
+ OKF("Instrumented %u locations (%s mode, ratio %u%%).", inst_blocks,
+ modeline, inst_ratio);
+
+ }
+
+ }
+
+ return true;
+
+}
+
+static void registerAFLPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new AFLCoverage());
+
+}
+
+static RegisterStandardPasses RegisterAFLPass(
+ PassManagerBuilder::EP_OptimizerLast, registerAFLPass);
+
+static RegisterStandardPasses RegisterAFLPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass);
+
diff --git a/instrumentation/afl-llvm-rt-lto.o.c b/instrumentation/afl-llvm-rt-lto.o.c
new file mode 100644
index 00000000..e53785ff
--- /dev/null
+++ b/instrumentation/afl-llvm-rt-lto.o.c
@@ -0,0 +1,27 @@
+/*
+ american fuzzy lop++ - LLVM instrumentation bootstrap
+ -----------------------------------------------------
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// to prevent the function from being removed
+unsigned char __afl_lto_mode = 0;
+
+/* Proper initialization routine. */
+
+__attribute__((constructor(0))) void __afl_auto_init_globals(void) {
+
+ if (getenv("AFL_DEBUG")) fprintf(stderr, "[__afl_auto_init_globals]\n");
+ __afl_lto_mode = 1;
+
+}
+
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
new file mode 100644
index 00000000..9921de0c
--- /dev/null
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -0,0 +1,292 @@
+/*
+ american fuzzy lop++ - LLVM CmpLog instrumentation
+ --------------------------------------------------
+
+ Written by Andrea Fioraldi <andreafioraldi@gmail.com>
+
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/Verifier.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Analysis/Verifier.h"
+ #include "llvm/DebugInfo.h"
+ #define nullptr 0
+#endif
+
+#include <set>
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class CmpLogInstructions : public ModulePass {
+
+ public:
+ static char ID;
+ CmpLogInstructions() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+#if LLVM_VERSION_MAJOR < 4
+ const char *getPassName() const override {
+
+#else
+ StringRef getPassName() const override {
+
+#endif
+ return "cmplog instructions";
+
+ }
+
+ private:
+ bool hookInstrs(Module &M);
+
+};
+
+} // namespace
+
+char CmpLogInstructions::ID = 0;
+
+bool CmpLogInstructions::hookInstrs(Module &M) {
+
+ std::vector<Instruction *> icomps;
+ LLVMContext & C = M.getContext();
+
+ Type * VoidTy = Type::getVoidTy(C);
+ IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType *Int16Ty = IntegerType::getInt16Ty(C);
+ IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+ IntegerType *Int64Ty = IntegerType::getInt64Ty(C);
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookIns1 = cast<Function>(c1);
+#else
+ FunctionCallee cmplogHookIns1 = c1;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookIns2 = cast<Function>(c2);
+#else
+ FunctionCallee cmplogHookIns2 = c2;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookIns4 = cast<Function>(c4);
+#else
+ FunctionCallee cmplogHookIns4 = c4;
+#endif
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookIns8 = cast<Function>(c8);
+#else
+ FunctionCallee cmplogHookIns8 = c8;
+#endif
+
+ /* iterate over all functions, bbs and instruction and add suitable calls */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_NE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_UGT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SGT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_ULT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SLT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_UGE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SGE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_ULE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+
+ /* this is probably not needed but we do it anyway */
+ if (!intTyOp0 || !intTyOp1) { continue; }
+
+ icomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!icomps.size()) return false;
+ // if (!be_quiet) errs() << "Hooking " << icomps.size() << " cmp
+ // instructions\n";
+
+ for (auto &selectcmpInst : icomps) {
+
+ IRBuilder<> IRB(selectcmpInst->getParent());
+ IRB.SetInsertPoint(selectcmpInst);
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+
+ unsigned max_size = intTyOp0->getBitWidth() > intTyOp1->getBitWidth()
+ ? intTyOp0->getBitWidth()
+ : intTyOp1->getBitWidth();
+
+ std::vector<Value *> args;
+ args.push_back(op0);
+ args.push_back(op1);
+
+ switch (max_size) {
+
+ case 8:
+ IRB.CreateCall(cmplogHookIns1, args);
+ break;
+ case 16:
+ IRB.CreateCall(cmplogHookIns2, args);
+ break;
+ case 32:
+ IRB.CreateCall(cmplogHookIns4, args);
+ break;
+ case 64:
+ IRB.CreateCall(cmplogHookIns8, args);
+ break;
+ default:
+ break;
+
+ }
+
+ }
+
+ return true;
+
+}
+
+bool CmpLogInstructions::runOnModule(Module &M) {
+
+ if (getenv("AFL_QUIET") == NULL)
+ printf("Running cmplog-instructions-pass by andreafioraldi@gmail.com\n");
+ else
+ be_quiet = 1;
+ hookInstrs(M);
+ verifyModule(M);
+
+ return true;
+
+}
+
+static void registerCmpLogInstructionsPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new CmpLogInstructions();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterCmpLogInstructionsPass(
+ PassManagerBuilder::EP_OptimizerLast, registerCmpLogInstructionsPass);
+
+static RegisterStandardPasses RegisterCmpLogInstructionsPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogInstructionsPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterCmpLogInstructionsPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast,
+ registerCmpLogInstructionsPass);
+#endif
+
diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc
new file mode 100644
index 00000000..e92883ae
--- /dev/null
+++ b/instrumentation/cmplog-routines-pass.cc
@@ -0,0 +1,213 @@
+/*
+ american fuzzy lop++ - LLVM CmpLog instrumentation
+ --------------------------------------------------
+
+ Written by Andrea Fioraldi <andreafioraldi@gmail.com>
+
+ Copyright 2015, 2016 Google Inc. All rights reserved.
+ Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/Verifier.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Analysis/Verifier.h"
+ #include "llvm/DebugInfo.h"
+ #define nullptr 0
+#endif
+
+#include <set>
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class CmpLogRoutines : public ModulePass {
+
+ public:
+ static char ID;
+ CmpLogRoutines() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+#if LLVM_VERSION_MAJOR < 4
+ const char *getPassName() const override {
+
+#else
+ StringRef getPassName() const override {
+
+#endif
+ return "cmplog routines";
+
+ }
+
+ private:
+ bool hookRtns(Module &M);
+
+};
+
+} // namespace
+
+char CmpLogRoutines::ID = 0;
+
+bool CmpLogRoutines::hookRtns(Module &M) {
+
+ std::vector<CallInst *> calls;
+ LLVMContext & C = M.getContext();
+
+ Type *VoidTy = Type::getVoidTy(C);
+ // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0);
+ IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+ PointerType *i8PtrTy = PointerType::get(Int8Ty, 0);
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, i8PtrTy, i8PtrTy
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *cmplogHookFn = cast<Function>(c);
+#else
+ FunctionCallee cmplogHookFn = c;
+#endif
+
+ /* iterate over all functions, bbs and instruction and add suitable calls */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee) continue;
+ if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+
+ FunctionType *FT = Callee->getFunctionType();
+
+ bool isPtrRtn = FT->getNumParams() >= 2 &&
+ !FT->getReturnType()->isVoidTy() &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0)->isPointerTy();
+
+ if (!isPtrRtn) continue;
+
+ calls.push_back(callInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!calls.size()) return false;
+ /*
+ if (!be_quiet)
+ errs() << "Hooking " << calls.size()
+ << " calls with pointers as arguments\n";
+ */
+
+ for (auto &callInst : calls) {
+
+ Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1);
+
+ IRBuilder<> IRB(callInst->getParent());
+ IRB.SetInsertPoint(callInst);
+
+ std::vector<Value *> args;
+ Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy);
+ Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy);
+ args.push_back(v1Pcasted);
+ args.push_back(v2Pcasted);
+
+ IRB.CreateCall(cmplogHookFn, args);
+
+ // errs() << callInst->getCalledFunction()->getName() << "\n";
+
+ }
+
+ return true;
+
+}
+
+bool CmpLogRoutines::runOnModule(Module &M) {
+
+ if (getenv("AFL_QUIET") == NULL)
+ printf("Running cmplog-routines-pass by andreafioraldi@gmail.com\n");
+ else
+ be_quiet = 1;
+ hookRtns(M);
+ verifyModule(M);
+
+ return true;
+
+}
+
+static void registerCmpLogRoutinesPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new CmpLogRoutines();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterCmpLogRoutinesPass(
+ PassManagerBuilder::EP_OptimizerLast, registerCmpLogRoutinesPass);
+
+static RegisterStandardPasses RegisterCmpLogRoutinesPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogRoutinesPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterCmpLogRoutinesPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast,
+ registerCmpLogRoutinesPass);
+#endif
+
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
new file mode 100644
index 00000000..de8b97f0
--- /dev/null
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -0,0 +1,594 @@
+/*
+ * Copyright 2016 laf-intel
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/Verifier.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Analysis/Verifier.h"
+ #include "llvm/DebugInfo.h"
+ #define nullptr 0
+#endif
+
+#include <set>
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class CompareTransform : public ModulePass {
+
+ public:
+ static char ID;
+ CompareTransform() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+#if LLVM_VERSION_MAJOR < 4
+ const char *getPassName() const override {
+
+#else
+ StringRef getPassName() const override {
+
+#endif
+ return "transforms compare functions";
+
+ }
+
+ private:
+ bool transformCmps(Module &M, const bool processStrcmp,
+ const bool processMemcmp, const bool processStrncmp,
+ const bool processStrcasecmp,
+ const bool processStrncasecmp);
+
+};
+
+} // namespace
+
+char CompareTransform::ID = 0;
+
+bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
+ const bool processMemcmp,
+ const bool processStrncmp,
+ const bool processStrcasecmp,
+ const bool processStrncasecmp) {
+
+ DenseMap<Value *, std::string *> valueMap;
+ std::vector<CallInst *> calls;
+ LLVMContext & C = M.getContext();
+ IntegerType * Int8Ty = IntegerType::getInt8Ty(C);
+ IntegerType * Int32Ty = IntegerType::getInt32Ty(C);
+ IntegerType * Int64Ty = IntegerType::getInt64Ty(C);
+
+#if LLVM_VERSION_MAJOR < 9
+ Constant *
+#else
+ FunctionCallee
+#endif
+ c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty
+#if LLVM_VERSION_MAJOR < 5
+ ,
+ NULL
+#endif
+ );
+#if LLVM_VERSION_MAJOR < 9
+ Function *tolowerFn = cast<Function>(c);
+#else
+ FunctionCallee tolowerFn = c;
+#endif
+
+ /* iterate over all functions, bbs and instruction and add suitable calls to
+ * strcmp/memcmp/strncmp/strcasecmp/strncasecmp */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CallInst *callInst = nullptr;
+
+ if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+ bool isStrcmp = processStrcmp;
+ bool isMemcmp = processMemcmp;
+ bool isStrncmp = processStrncmp;
+ bool isStrcasecmp = processStrcasecmp;
+ bool isStrncasecmp = processStrncasecmp;
+ bool isIntMemcpy = true;
+
+ Function *Callee = callInst->getCalledFunction();
+ if (!Callee) continue;
+ if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+ StringRef FuncName = Callee->getName();
+ isStrcmp &= !FuncName.compare(StringRef("strcmp"));
+ isMemcmp &= (!FuncName.compare(StringRef("memcmp")) ||
+ !FuncName.compare(StringRef("bcmp")));
+ isStrncmp &= !FuncName.compare(StringRef("strncmp"));
+ isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp"));
+ isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp"));
+ isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy)
+ continue;
+
+ /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
+ * prototype */
+ FunctionType *FT = Callee->getFunctionType();
+
+ isStrcmp &=
+ FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+ isStrcasecmp &=
+ FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
+ isMemcmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0)->isPointerTy() &&
+ FT->getParamType(1)->isPointerTy() &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+ isStrncasecmp &= FT->getNumParams() == 3 &&
+ FT->getReturnType()->isIntegerTy(32) &&
+ FT->getParamType(0) == FT->getParamType(1) &&
+ FT->getParamType(0) ==
+ IntegerType::getInt8PtrTy(M.getContext()) &&
+ FT->getParamType(2)->isIntegerTy();
+
+ if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+ !isStrncasecmp && !isIntMemcpy)
+ continue;
+
+ /* is a str{n,}{case,}cmp/memcmp, check if we have
+ * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+ * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
+ * memcmp(x, "const", ..) or memcmp("const", x, ..) */
+ Value *Str1P = callInst->getArgOperand(0),
+ *Str2P = callInst->getArgOperand(1);
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ if (isIntMemcpy && HasStr2) {
+
+ valueMap[Str1P] = new std::string(Str2.str());
+ // fprintf(stderr, "saved %s for %p\n", Str2.str().c_str(), Str1P);
+ continue;
+
+ }
+
+ // not literal? maybe global or local variable
+ if (!(HasStr1 || HasStr2)) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array =
+ dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+ HasStr2 = true;
+ Str2 = Array->getAsString();
+ valueMap[Str2P] = new std::string(Str2.str());
+ fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!HasStr2) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str1P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (Var->hasInitializer()) {
+
+ if (auto *Array = dyn_cast<ConstantDataArray>(
+ Var->getInitializer())) {
+
+ HasStr1 = true;
+ Str1 = Array->getAsString();
+ valueMap[Str1P] = new std::string(Str1.str());
+ // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
+
+ }
+
+ }
+
+ }
+
+ }
+
+ } else if (isIntMemcpy) {
+
+ valueMap[Str1P] = new std::string(Str2.str());
+ // fprintf(stderr, "saved\n");
+
+ }
+
+ }
+
+ if (isIntMemcpy) continue;
+
+ if (!(HasStr1 || HasStr2)) {
+
+ // do we have a saved local variable initialization?
+ std::string *val = valueMap[Str1P];
+ if (val && !val->empty()) {
+
+ Str1 = StringRef(*val);
+ HasStr1 = true;
+ // fprintf(stderr, "loaded1 %s\n", Str1.str().c_str());
+
+ } else {
+
+ val = valueMap[Str2P];
+ if (val && !val->empty()) {
+
+ Str2 = StringRef(*val);
+ HasStr2 = true;
+ // fprintf(stderr, "loaded2 %s\n", Str2.str().c_str());
+
+ }
+
+ }
+
+ }
+
+ /* handle cases of one string is const, one string is variable */
+ if (!(HasStr1 || HasStr2)) continue;
+
+ if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+ /* check if third operand is a constant integer
+ * strlen("constStr") and sizeof() are treated as constant */
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t len = ilen->getZExtValue();
+ // if len is zero this is a pointless call but allow real
+ // implementation to worry about that
+ if (!len) continue;
+
+ if (isMemcmp) {
+
+ // if size of compare is larger than constant string this is
+ // likely a bug but allow real implementation to worry about
+ // that
+ uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size();
+ if (literalLength + 1 < ilen->getZExtValue()) continue;
+
+ }
+
+ } else if (isMemcmp)
+
+ // this *may* supply a len greater than the constant string at
+ // runtime so similarly we don't want to have to handle that
+ continue;
+
+ }
+
+ calls.push_back(callInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!calls.size()) return false;
+ if (!be_quiet)
+ printf(
+ "Replacing %zu calls to strcmp/memcmp/strncmp/strcasecmp/strncasecmp\n",
+ calls.size());
+
+ for (auto &callInst : calls) {
+
+ Value *Str1P = callInst->getArgOperand(0),
+ *Str2P = callInst->getArgOperand(1);
+ StringRef Str1, Str2, ConstStr;
+ std::string TmpConstStr;
+ Value * VarStr;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+ uint64_t constStrLen, unrollLen, constSizedLen = 0;
+ bool isMemcmp =
+ !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
+ bool isSizedcmp = isMemcmp ||
+ !callInst->getCalledFunction()->getName().compare(
+ StringRef("strncmp")) ||
+ !callInst->getCalledFunction()->getName().compare(
+ StringRef("strncasecmp"));
+ Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
+ bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
+ bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
+ StringRef("strcasecmp")) ||
+ !callInst->getCalledFunction()->getName().compare(
+ StringRef("strncasecmp"));
+
+ if (!(HasStr1 || HasStr2)) {
+
+ // do we have a saved local or global variable initialization?
+ std::string *val = valueMap[Str1P];
+ if (val && !val->empty()) {
+
+ Str1 = StringRef(*val);
+ HasStr1 = true;
+
+ } else {
+
+ val = valueMap[Str2P];
+ if (val && !val->empty()) {
+
+ Str2 = StringRef(*val);
+ HasStr2 = true;
+
+ }
+
+ }
+
+ }
+
+ if (isConstSized) {
+
+ constSizedLen = dyn_cast<ConstantInt>(sizedValue)->getZExtValue();
+
+ }
+
+ if (HasStr1) {
+
+ TmpConstStr = Str1.str();
+ VarStr = Str2P;
+
+ } else {
+
+ TmpConstStr = Str2.str();
+ VarStr = Str1P;
+
+ }
+
+ // add null termination character implicit in c strings
+ TmpConstStr.append("\0", 1);
+
+ // in the unusual case the const str has embedded null
+ // characters, the string comparison functions should terminate
+ // at the first null
+ if (!isMemcmp)
+ TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
+
+ constStrLen = TmpConstStr.length();
+ // prefer use of StringRef (in comparison to std::string a StringRef has
+ // built-in runtime bounds checking, which makes debugging easier)
+ ConstStr = StringRef(TmpConstStr);
+
+ if (isConstSized)
+ unrollLen = constSizedLen < constStrLen ? constSizedLen : constStrLen;
+ else
+ unrollLen = constStrLen;
+
+ /*
+ if (!be_quiet)
+ errs() << callInst->getCalledFunction()->getName() << ": unroll len "
+ << unrollLen
+ << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ":
+ "
+ << ConstStr << "\n";
+ */
+
+ /* split before the call instruction */
+ BasicBlock *bb = callInst->getParent();
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
+
+ BasicBlock *next_lenchk_bb = NULL;
+ if (isSizedcmp && !isConstSized) {
+
+ next_lenchk_bb =
+ BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
+ BranchInst::Create(end_bb, next_lenchk_bb);
+
+ }
+
+ BasicBlock *next_cmp_bb =
+ BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
+ BranchInst::Create(end_bb, next_cmp_bb);
+ PHINode *PN = PHINode::Create(
+ Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi");
+
+#if LLVM_VERSION_MAJOR < 8
+ TerminatorInst *term = bb->getTerminator();
+#else
+ Instruction *term = bb->getTerminator();
+#endif
+ BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb);
+ term->eraseFromParent();
+
+ for (uint64_t i = 0; i < unrollLen; i++) {
+
+ BasicBlock * cur_cmp_bb = next_cmp_bb, *cur_lenchk_bb = next_lenchk_bb;
+ unsigned char c;
+
+ if (cur_lenchk_bb) {
+
+ IRBuilder<> cur_lenchk_IRB(&*(cur_lenchk_bb->getFirstInsertionPt()));
+ Value * icmp = cur_lenchk_IRB.CreateICmpEQ(
+ sizedValue, ConstantInt::get(sizedValue->getType(), i));
+ cur_lenchk_IRB.CreateCondBr(icmp, end_bb, cur_cmp_bb);
+ cur_lenchk_bb->getTerminator()->eraseFromParent();
+
+ PN->addIncoming(ConstantInt::get(Int32Ty, 0), cur_lenchk_bb);
+
+ }
+
+ if (isCaseInsensitive)
+ c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff);
+ else
+ c = (unsigned char)ConstStr[i];
+
+ IRBuilder<> cur_cmp_IRB(&*(cur_cmp_bb->getFirstInsertionPt()));
+
+ Value *v = ConstantInt::get(Int64Ty, i);
+ Value *ele = cur_cmp_IRB.CreateInBoundsGEP(VarStr, v, "empty");
+ Value *load = cur_cmp_IRB.CreateLoad(ele);
+
+ if (isCaseInsensitive) {
+
+ // load >= 'A' && load <= 'Z' ? load | 0x020 : load
+ load = cur_cmp_IRB.CreateZExt(load, Int32Ty);
+ std::vector<Value *> args;
+ args.push_back(load);
+ load = cur_cmp_IRB.CreateCall(tolowerFn, args);
+ load = cur_cmp_IRB.CreateTrunc(load, Int8Ty);
+
+ }
+
+ Value *isub;
+ if (HasStr1)
+ isub = cur_cmp_IRB.CreateSub(ConstantInt::get(Int8Ty, c), load);
+ else
+ isub = cur_cmp_IRB.CreateSub(load, ConstantInt::get(Int8Ty, c));
+
+ Value *sext = cur_cmp_IRB.CreateSExt(isub, Int32Ty);
+ PN->addIncoming(sext, cur_cmp_bb);
+
+ if (i < unrollLen - 1) {
+
+ if (cur_lenchk_bb) {
+
+ next_lenchk_bb =
+ BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb);
+ BranchInst::Create(end_bb, next_lenchk_bb);
+
+ }
+
+ next_cmp_bb =
+ BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb);
+ BranchInst::Create(end_bb, next_cmp_bb);
+
+ Value *icmp =
+ cur_cmp_IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0));
+ cur_cmp_IRB.CreateCondBr(
+ icmp, next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, end_bb);
+ cur_cmp_bb->getTerminator()->eraseFromParent();
+
+ } else {
+
+ // IRB.CreateBr(end_bb);
+
+ }
+
+ // add offset to varstr
+ // create load
+ // create signed isub
+ // create icmp
+ // create jcc
+ // create next_bb
+
+ }
+
+ /* since the call is the first instruction of the bb it is safe to
+ * replace it with a phi instruction */
+ BasicBlock::iterator ii(callInst);
+ ReplaceInstWithInst(callInst->getParent()->getInstList(), ii, PN);
+
+ }
+
+ return true;
+
+}
+
+bool CompareTransform::runOnModule(Module &M) {
+
+ if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL)
+ printf(
+ "Running compare-transform-pass by laf.intel@gmail.com, extended by "
+ "heiko@hexco.de\n");
+ else
+ be_quiet = 1;
+
+ transformCmps(M, true, true, true, true, true);
+ verifyModule(M);
+
+ return true;
+
+}
+
+static void registerCompTransPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new CompareTransform();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterCompTransPass(
+ PassManagerBuilder::EP_OptimizerLast, registerCompTransPass);
+
+static RegisterStandardPasses RegisterCompTransPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerCompTransPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterCompTransPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerCompTransPass);
+#endif
+
diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-ngram-coverage.h
new file mode 100644
index 00000000..12b666e9
--- /dev/null
+++ b/instrumentation/llvm-ngram-coverage.h
@@ -0,0 +1,18 @@
+#ifndef AFL_NGRAM_CONFIG_H
+#define AFL_NGRAM_CONFIG_H
+
+#include "../config.h"
+
+#if (MAP_SIZE_POW2 <= 16)
+typedef u16 PREV_LOC_T;
+#elif (MAP_SIZE_POW2 <= 32)
+typedef u32 PREV_LOC_T;
+#else
+typedef u64 PREV_LOC_T;
+#endif
+
+/* Maximum ngram size */
+#define NGRAM_SIZE_MAX 16U
+
+#endif
+
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
new file mode 100644
index 00000000..3f05dd97
--- /dev/null
+++ b/instrumentation/split-compares-pass.so.cc
@@ -0,0 +1,1365 @@
+/*
+ * Copyright 2016 laf-intel
+ * extended for floating point by Heiko Eißfeldt
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/Module.h"
+
+#include "llvm/IR/IRBuilder.h"
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/Verifier.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Analysis/Verifier.h"
+ #include "llvm/DebugInfo.h"
+ #define nullptr 0
+#endif
+
+using namespace llvm;
+#include "afl-llvm-common.h"
+
+namespace {
+
+class SplitComparesTransform : public ModulePass {
+
+ public:
+ static char ID;
+ SplitComparesTransform() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+#if LLVM_VERSION_MAJOR >= 4
+ StringRef getPassName() const override {
+
+#else
+ const char *getPassName() const override {
+
+#endif
+ return "simplifies and splits ICMP instructions";
+
+ }
+
+ private:
+ int enableFPSplit;
+
+ size_t splitIntCompares(Module &M, unsigned bitw);
+ size_t splitFPCompares(Module &M);
+ bool simplifyCompares(Module &M);
+ bool simplifyFPCompares(Module &M);
+ bool simplifyIntSignedness(Module &M);
+ size_t nextPowerOfTwo(size_t in);
+
+};
+
+} // namespace
+
+char SplitComparesTransform::ID = 0;
+
+/* This function splits FCMP instructions with xGE or xLE predicates into two
+ * FCMP instructions with predicate xGT or xLT and EQ */
+bool SplitComparesTransform::simplifyFPCompares(Module &M) {
+
+ LLVMContext & C = M.getContext();
+ std::vector<Instruction *> fcomps;
+ IntegerType * Int1Ty = IntegerType::getInt1Ty(C);
+
+ /* iterate over all functions, bbs and instruction and add
+ * all integer comparisons with >= and <= predicates to the icomps vector */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (enableFPSplit &&
+ (selectcmpInst->getPredicate() == CmpInst::FCMP_OGE ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_UGE ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_OLE ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_ULE)) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ Type *TyOp0 = op0->getType();
+ Type *TyOp1 = op1->getType();
+
+ /* this is probably not needed but we do it anyway */
+ if (TyOp0 != TyOp1) { continue; }
+
+ if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; }
+
+ fcomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!fcomps.size()) { return false; }
+
+ /* transform for floating point */
+ for (auto &FcmpInst : fcomps) {
+
+ BasicBlock *bb = FcmpInst->getParent();
+
+ auto op0 = FcmpInst->getOperand(0);
+ auto op1 = FcmpInst->getOperand(1);
+
+ /* find out what the new predicate is going to be */
+ auto pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate();
+ CmpInst::Predicate new_pred;
+ switch (pred) {
+
+ case CmpInst::FCMP_UGE:
+ new_pred = CmpInst::FCMP_UGT;
+ break;
+ case CmpInst::FCMP_OGE:
+ new_pred = CmpInst::FCMP_OGT;
+ break;
+ case CmpInst::FCMP_ULE:
+ new_pred = CmpInst::FCMP_ULT;
+ break;
+ case CmpInst::FCMP_OLE:
+ new_pred = CmpInst::FCMP_OLT;
+ break;
+ default: // keep the compiler happy
+ continue;
+
+ }
+
+ /* split before the fcmp instruction */
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(FcmpInst));
+
+ /* the old bb now contains a unconditional jump to the new one (end_bb)
+ * we need to delete it later */
+
+ /* create the FCMP instruction with new_pred and add it to the old basic
+ * block bb it is now at the position where the old FcmpInst was */
+ Instruction *fcmp_np;
+ fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ fcmp_np);
+
+ /* create a new basic block which holds the new EQ fcmp */
+ Instruction *fcmp_eq;
+ /* insert middle_bb before end_bb */
+ BasicBlock *middle_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+ fcmp_eq = CmpInst::Create(Instruction::FCmp, CmpInst::FCMP_OEQ, op0, op1);
+ middle_bb->getInstList().push_back(fcmp_eq);
+ /* add an unconditional branch to the end of middle_bb with destination
+ * end_bb */
+ BranchInst::Create(end_bb, middle_bb);
+
+ /* replace the uncond branch with a conditional one, which depends on the
+ * new_pred fcmp. True goes to end, false to the middle (injected) bb */
+ auto term = bb->getTerminator();
+ BranchInst::Create(end_bb, middle_bb, fcmp_np, bb);
+ term->eraseFromParent();
+
+ /* replace the old FcmpInst (which is the first inst in end_bb) with a PHI
+ * inst to wire up the loose ends */
+ PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+ /* the first result depends on the outcome of fcmp_eq */
+ PN->addIncoming(fcmp_eq, middle_bb);
+ /* if the source was the original bb we know that the fcmp_np yielded true
+ * hence we can hardcode this value */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+ /* replace the old FcmpInst with our new and shiny PHI inst */
+ BasicBlock::iterator ii(FcmpInst);
+ ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN);
+
+ }
+
+ return true;
+
+}
+
+/* This function splits ICMP instructions with xGE or xLE predicates into two
+ * ICMP instructions with predicate xGT or xLT and EQ */
+bool SplitComparesTransform::simplifyCompares(Module &M) {
+
+ LLVMContext & C = M.getContext();
+ std::vector<Instruction *> icomps;
+ IntegerType * Int1Ty = IntegerType::getInt1Ty(C);
+
+ /* iterate over all functions, bbs and instruction and add
+ * all integer comparisons with >= and <= predicates to the icomps vector */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (selectcmpInst->getPredicate() == CmpInst::ICMP_UGE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SGE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_ULE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+
+ /* this is probably not needed but we do it anyway */
+ if (!intTyOp0 || !intTyOp1) { continue; }
+
+ icomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!icomps.size()) { return false; }
+
+ for (auto &IcmpInst : icomps) {
+
+ BasicBlock *bb = IcmpInst->getParent();
+
+ auto op0 = IcmpInst->getOperand(0);
+ auto op1 = IcmpInst->getOperand(1);
+
+ /* find out what the new predicate is going to be */
+ auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+ CmpInst::Predicate new_pred;
+ switch (pred) {
+
+ case CmpInst::ICMP_UGE:
+ new_pred = CmpInst::ICMP_UGT;
+ break;
+ case CmpInst::ICMP_SGE:
+ new_pred = CmpInst::ICMP_SGT;
+ break;
+ case CmpInst::ICMP_ULE:
+ new_pred = CmpInst::ICMP_ULT;
+ break;
+ case CmpInst::ICMP_SLE:
+ new_pred = CmpInst::ICMP_SLT;
+ break;
+ default: // keep the compiler happy
+ continue;
+
+ }
+
+ /* split before the icmp instruction */
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+
+ /* the old bb now contains a unconditional jump to the new one (end_bb)
+ * we need to delete it later */
+
+ /* create the ICMP instruction with new_pred and add it to the old basic
+ * block bb it is now at the position where the old IcmpInst was */
+ Instruction *icmp_np;
+ icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ icmp_np);
+
+ /* create a new basic block which holds the new EQ icmp */
+ Instruction *icmp_eq;
+ /* insert middle_bb before end_bb */
+ BasicBlock *middle_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+ icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1);
+ middle_bb->getInstList().push_back(icmp_eq);
+ /* add an unconditional branch to the end of middle_bb with destination
+ * end_bb */
+ BranchInst::Create(end_bb, middle_bb);
+
+ /* replace the uncond branch with a conditional one, which depends on the
+ * new_pred icmp. True goes to end, false to the middle (injected) bb */
+ auto term = bb->getTerminator();
+ BranchInst::Create(end_bb, middle_bb, icmp_np, bb);
+ term->eraseFromParent();
+
+ /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI
+ * inst to wire up the loose ends */
+ PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+ /* the first result depends on the outcome of icmp_eq */
+ PN->addIncoming(icmp_eq, middle_bb);
+ /* if the source was the original bb we know that the icmp_np yielded true
+ * hence we can hardcode this value */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+ /* replace the old IcmpInst with our new and shiny PHI inst */
+ BasicBlock::iterator ii(IcmpInst);
+ ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+ }
+
+ return true;
+
+}
+
+/* this function transforms signed compares to equivalent unsigned compares */
+bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
+
+ LLVMContext & C = M.getContext();
+ std::vector<Instruction *> icomps;
+ IntegerType * Int1Ty = IntegerType::getInt1Ty(C);
+
+ /* iterate over all functions, bbs and instructions and add
+ * all signed compares to icomps vector */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (selectcmpInst->getPredicate() == CmpInst::ICMP_SGT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_SLT) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+
+ /* see above */
+ if (!intTyOp0 || !intTyOp1) { continue; }
+
+ /* i think this is not possible but to lazy to look it up */
+ if (intTyOp0->getBitWidth() != intTyOp1->getBitWidth()) {
+
+ continue;
+
+ }
+
+ icomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!icomps.size()) { return false; }
+
+ for (auto &IcmpInst : icomps) {
+
+ BasicBlock *bb = IcmpInst->getParent();
+
+ auto op0 = IcmpInst->getOperand(0);
+ auto op1 = IcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ unsigned bitw = intTyOp0->getBitWidth();
+ IntegerType *IntType = IntegerType::get(C, bitw);
+
+ /* get the new predicate */
+ auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+ CmpInst::Predicate new_pred;
+ if (pred == CmpInst::ICMP_SGT) {
+
+ new_pred = CmpInst::ICMP_UGT;
+
+ } else {
+
+ new_pred = CmpInst::ICMP_ULT;
+
+ }
+
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+
+ /* create a 1 bit compare for the sign bit. to do this shift and trunc
+ * the original operands so only the first bit remains.*/
+ Instruction *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit;
+
+ s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
+ ConstantInt::get(IntType, bitw - 1));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
+ t_op0 = new TruncInst(s_op0, Int1Ty);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0);
+
+ s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
+ ConstantInt::get(IntType, bitw - 1));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
+ t_op1 = new TruncInst(s_op1, Int1Ty);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1);
+
+ /* compare of the sign bits */
+ icmp_sign_bit =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ icmp_sign_bit);
+
+ /* create a new basic block which is executed if the signedness bit is
+ * different */
+ Instruction *icmp_inv_sig_cmp;
+ BasicBlock * sign_bb =
+ BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb);
+ if (pred == CmpInst::ICMP_SGT) {
+
+ /* if we check for > and the op0 positive and op1 negative then the final
+ * result is true. if op0 negative and op1 pos, the cmp must result
+ * in false
+ */
+ icmp_inv_sig_cmp =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1);
+
+ } else {
+
+ /* just the inverse of the above statement */
+ icmp_inv_sig_cmp =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1);
+
+ }
+
+ sign_bb->getInstList().push_back(icmp_inv_sig_cmp);
+ BranchInst::Create(end_bb, sign_bb);
+
+ /* create a new bb which is executed if signedness is equal */
+ Instruction *icmp_usign_cmp;
+ BasicBlock * middle_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+ /* we can do a normal unsigned compare now */
+ icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1);
+ middle_bb->getInstList().push_back(icmp_usign_cmp);
+ BranchInst::Create(end_bb, middle_bb);
+
+ auto term = bb->getTerminator();
+ /* if the sign is eq do a normal unsigned cmp, else we have to check the
+ * signedness bit */
+ BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb);
+ term->eraseFromParent();
+
+ PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+
+ PN->addIncoming(icmp_usign_cmp, middle_bb);
+ PN->addIncoming(icmp_inv_sig_cmp, sign_bb);
+
+ BasicBlock::iterator ii(IcmpInst);
+ ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+ }
+
+ return true;
+
+}
+
+size_t SplitComparesTransform::nextPowerOfTwo(size_t in) {
+
+ --in;
+ in |= in >> 1;
+ in |= in >> 2;
+ in |= in >> 4;
+ // in |= in >> 8;
+ // in |= in >> 16;
+ return in + 1;
+
+}
+
+/* splits fcmps into two nested fcmps with sign compare and the rest */
+size_t SplitComparesTransform::splitFPCompares(Module &M) {
+
+ size_t count = 0;
+
+ LLVMContext &C = M.getContext();
+
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
+ const DataLayout &dl = M.getDataLayout();
+
+ /* define unions with floating point and (sign, exponent, mantissa) triples
+ */
+ if (dl.isLittleEndian()) {
+
+ } else if (dl.isBigEndian()) {
+
+ } else {
+
+ return count;
+
+ }
+
+#endif
+
+ std::vector<CmpInst *> fcomps;
+
+ /* get all EQ, NE, GT, and LT fcmps. if the other two
+ * functions were executed only these four predicates should exist */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (selectcmpInst->getPredicate() == CmpInst::FCMP_OEQ ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_ONE ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_UNE ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_UGT ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_OGT ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_ULT ||
+ selectcmpInst->getPredicate() == CmpInst::FCMP_OLT) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ Type *TyOp0 = op0->getType();
+ Type *TyOp1 = op1->getType();
+
+ if (TyOp0 != TyOp1) { continue; }
+
+ if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; }
+
+ fcomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!fcomps.size()) { return count; }
+
+ IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
+
+ for (auto &FcmpInst : fcomps) {
+
+ BasicBlock *bb = FcmpInst->getParent();
+
+ auto op0 = FcmpInst->getOperand(0);
+ auto op1 = FcmpInst->getOperand(1);
+
+ unsigned op_size;
+ op_size = op0->getType()->getPrimitiveSizeInBits();
+
+ if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; }
+
+ const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits();
+ const unsigned int precision =
+ sizeInBits == 32
+ ? 24
+ : sizeInBits == 64
+ ? 53
+ : sizeInBits == 128 ? 113
+ : sizeInBits == 16 ? 11
+ /* sizeInBits == 80 */
+ : 65;
+
+ const unsigned shiftR_exponent = precision - 1;
+ const unsigned long long mask_fraction =
+ (1ULL << (shiftR_exponent - 1)) | ((1ULL << (shiftR_exponent - 1)) - 1);
+ const unsigned long long mask_exponent =
+ (1ULL << (sizeInBits - precision)) - 1;
+
+ // round up sizes to the next power of two
+ // this should help with integer compare splitting
+ size_t exTySizeBytes = ((sizeInBits - precision + 7) >> 3);
+ size_t frTySizeBytes = ((precision - 1ULL + 7) >> 3);
+
+ IntegerType *IntExponentTy =
+ IntegerType::get(C, nextPowerOfTwo(exTySizeBytes) << 3);
+ IntegerType *IntFractionTy =
+ IntegerType::get(C, nextPowerOfTwo(frTySizeBytes) << 3);
+
+ // errs() << "Fractions: IntFractionTy size " <<
+ // IntFractionTy->getPrimitiveSizeInBits() << ", op_size " << op_size <<
+ // ", mask " << mask_fraction <<
+ // ", precision " << precision << "\n";
+
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(FcmpInst));
+
+ /* create the integers from floats directly */
+ Instruction *b_op0, *b_op1;
+ b_op0 = CastInst::Create(Instruction::BitCast, op0,
+ IntegerType::get(C, op_size));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0);
+
+ b_op1 = CastInst::Create(Instruction::BitCast, op1,
+ IntegerType::get(C, op_size));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1);
+
+ /* isolate signs of value of floating point type */
+
+ /* create a 1 bit compare for the sign bit. to do this shift and trunc
+ * the original operands so only the first bit remains.*/
+ Instruction *s_s0, *t_s0, *s_s1, *t_s1, *icmp_sign_bit;
+
+ s_s0 =
+ BinaryOperator::Create(Instruction::LShr, b_op0,
+ ConstantInt::get(b_op0->getType(), op_size - 1));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0);
+ t_s0 = new TruncInst(s_s0, Int1Ty);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0);
+
+ s_s1 =
+ BinaryOperator::Create(Instruction::LShr, b_op1,
+ ConstantInt::get(b_op1->getType(), op_size - 1));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1);
+ t_s1 = new TruncInst(s_s1, Int1Ty);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1);
+
+ /* compare of the sign bits */
+ icmp_sign_bit =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ icmp_sign_bit);
+
+ /* create a new basic block which is executed if the signedness bits are
+ * equal */
+ BasicBlock *signequal_bb =
+ BasicBlock::Create(C, "signequal", end_bb->getParent(), end_bb);
+
+ BranchInst::Create(end_bb, signequal_bb);
+
+ /* create a new bb which is executed if exponents are satisfying the compare
+ */
+ BasicBlock *middle_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+
+ BranchInst::Create(end_bb, middle_bb);
+
+ auto term = bb->getTerminator();
+ /* if the signs are different goto end_bb else to signequal_bb */
+ BranchInst::Create(signequal_bb, end_bb, icmp_sign_bit, bb);
+ term->eraseFromParent();
+
+ /* insert code for equal signs */
+
+ /* isolate the exponents */
+ Instruction *s_e0, *m_e0, *t_e0, *s_e1, *m_e1, *t_e1;
+
+ s_e0 = BinaryOperator::Create(
+ Instruction::LShr, b_op0,
+ ConstantInt::get(b_op0->getType(), shiftR_exponent));
+ s_e1 = BinaryOperator::Create(
+ Instruction::LShr, b_op1,
+ ConstantInt::get(b_op1->getType(), shiftR_exponent));
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), s_e0);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), s_e1);
+
+ t_e0 = new TruncInst(s_e0, IntExponentTy);
+ t_e1 = new TruncInst(s_e1, IntExponentTy);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), t_e0);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), t_e1);
+
+ if (sizeInBits - precision < exTySizeBytes * 8) {
+
+ m_e0 = BinaryOperator::Create(
+ Instruction::And, t_e0,
+ ConstantInt::get(t_e0->getType(), mask_exponent));
+ m_e1 = BinaryOperator::Create(
+ Instruction::And, t_e1,
+ ConstantInt::get(t_e1->getType(), mask_exponent));
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), m_e0);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()), m_e1);
+
+ } else {
+
+ m_e0 = t_e0;
+ m_e1 = t_e1;
+
+ }
+
+ /* compare the exponents of the operands */
+ Instruction *icmp_exponents_equal;
+ Instruction *icmp_exponent_result;
+ BasicBlock * signequal2_bb = signequal_bb;
+ switch (FcmpInst->getPredicate()) {
+
+ case CmpInst::FCMP_OEQ:
+ icmp_exponent_result =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1);
+ break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ icmp_exponent_result =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, m_e0, m_e1);
+ break;
+ /* compare the exponents of the operands (signs are equal)
+ * if exponents are equal -> proceed to mantissa comparison
+ * else get result depending on sign
+ */
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ Instruction *icmp_exponent;
+ icmp_exponents_equal =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()),
+ icmp_exponents_equal);
+
+ // shortcut for unequal exponents
+ signequal2_bb = signequal_bb->splitBasicBlock(
+ BasicBlock::iterator(signequal_bb->getTerminator()));
+
+ /* if the exponents are equal goto middle_bb else to signequal2_bb */
+ term = signequal_bb->getTerminator();
+ BranchInst::Create(middle_bb, signequal2_bb, icmp_exponents_equal,
+ signequal_bb);
+ term->eraseFromParent();
+
+ icmp_exponent =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1);
+ signequal2_bb->getInstList().insert(
+ BasicBlock::iterator(signequal2_bb->getTerminator()),
+ icmp_exponent);
+ icmp_exponent_result =
+ BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0);
+ break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ icmp_exponents_equal =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1);
+ signequal_bb->getInstList().insert(
+ BasicBlock::iterator(signequal_bb->getTerminator()),
+ icmp_exponents_equal);
+
+ // shortcut for unequal exponents
+ signequal2_bb = signequal_bb->splitBasicBlock(
+ BasicBlock::iterator(signequal_bb->getTerminator()));
+
+ /* if the exponents are equal goto middle_bb else to signequal2_bb */
+ term = signequal_bb->getTerminator();
+ BranchInst::Create(middle_bb, signequal2_bb, icmp_exponents_equal,
+ signequal_bb);
+ term->eraseFromParent();
+
+ icmp_exponent =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1);
+ signequal2_bb->getInstList().insert(
+ BasicBlock::iterator(signequal2_bb->getTerminator()),
+ icmp_exponent);
+ icmp_exponent_result =
+ BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0);
+ break;
+ default:
+ continue;
+
+ }
+
+ signequal2_bb->getInstList().insert(
+ BasicBlock::iterator(signequal2_bb->getTerminator()),
+ icmp_exponent_result);
+
+ {
+
+ term = signequal2_bb->getTerminator();
+
+ switch (FcmpInst->getPredicate()) {
+
+ case CmpInst::FCMP_OEQ:
+ /* if the exponents are satifying the compare do a fraction cmp in
+ * middle_bb */
+ BranchInst::Create(middle_bb, end_bb, icmp_exponent_result,
+ signequal2_bb);
+ break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ /* if the exponents are satifying the compare do a fraction cmp in
+ * middle_bb */
+ BranchInst::Create(end_bb, middle_bb, icmp_exponent_result,
+ signequal2_bb);
+ break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ BranchInst::Create(end_bb, signequal2_bb);
+ break;
+ default:
+ continue;
+
+ }
+
+ term->eraseFromParent();
+
+ }
+
+ /* isolate the mantissa aka fraction */
+ Instruction *t_f0, *t_f1;
+ bool needTrunc = IntFractionTy->getPrimitiveSizeInBits() < op_size;
+
+ if (precision - 1 < frTySizeBytes * 8) {
+
+ Instruction *m_f0, *m_f1;
+ m_f0 = BinaryOperator::Create(
+ Instruction::And, b_op0,
+ ConstantInt::get(b_op0->getType(), mask_fraction));
+ m_f1 = BinaryOperator::Create(
+ Instruction::And, b_op1,
+ ConstantInt::get(b_op1->getType(), mask_fraction));
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), m_f0);
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), m_f1);
+
+ if (needTrunc) {
+
+ t_f0 = new TruncInst(m_f0, IntFractionTy);
+ t_f1 = new TruncInst(m_f1, IntFractionTy);
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), t_f1);
+
+ } else {
+
+ t_f0 = m_f0;
+ t_f1 = m_f1;
+
+ }
+
+ } else {
+
+ if (needTrunc) {
+
+ t_f0 = new TruncInst(b_op0, IntFractionTy);
+ t_f1 = new TruncInst(b_op1, IntFractionTy);
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), t_f0);
+ middle_bb->getInstList().insert(
+ BasicBlock::iterator(middle_bb->getTerminator()), t_f1);
+
+ } else {
+
+ t_f0 = b_op0;
+ t_f1 = b_op1;
+
+ }
+
+ }
+
+ /* compare the fractions of the operands */
+ Instruction *icmp_fraction_result;
+ Instruction *icmp_fraction_result2;
+ BasicBlock * middle2_bb = middle_bb;
+ PHINode * PN2 = nullptr;
+ switch (FcmpInst->getPredicate()) {
+
+ case CmpInst::FCMP_OEQ:
+ icmp_fraction_result =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_f0, t_f1);
+ middle2_bb->getInstList().insert(
+ BasicBlock::iterator(middle2_bb->getTerminator()),
+ icmp_fraction_result);
+
+ break;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_ONE:
+ icmp_fraction_result =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, t_f0, t_f1);
+ middle2_bb->getInstList().insert(
+ BasicBlock::iterator(middle2_bb->getTerminator()),
+ icmp_fraction_result);
+
+ break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT: {
+
+ middle2_bb = middle_bb->splitBasicBlock(
+ BasicBlock::iterator(middle_bb->getTerminator()));
+
+ BasicBlock *negative_bb = BasicBlock::Create(
+ C, "negative_value", middle2_bb->getParent(), middle2_bb);
+ BasicBlock *positive_bb = BasicBlock::Create(
+ C, "positive_value", negative_bb->getParent(), negative_bb);
+
+ if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT ||
+ FcmpInst->getPredicate() == CmpInst::FCMP_UGT) {
+
+ negative_bb->getInstList().push_back(
+ icmp_fraction_result = CmpInst::Create(
+ Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1));
+ positive_bb->getInstList().push_back(
+ icmp_fraction_result2 = CmpInst::Create(
+ Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1));
+
+ } else {
+
+ negative_bb->getInstList().push_back(
+ icmp_fraction_result = CmpInst::Create(
+ Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1));
+ positive_bb->getInstList().push_back(
+ icmp_fraction_result2 = CmpInst::Create(
+ Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1));
+
+ }
+
+ BranchInst::Create(middle2_bb, negative_bb);
+ BranchInst::Create(middle2_bb, positive_bb);
+
+ term = middle_bb->getTerminator();
+ BranchInst::Create(negative_bb, positive_bb, t_s0, middle_bb);
+ term->eraseFromParent();
+
+ PN2 = PHINode::Create(Int1Ty, 2, "");
+ PN2->addIncoming(icmp_fraction_result, negative_bb);
+ PN2->addIncoming(icmp_fraction_result2, positive_bb);
+ middle2_bb->getInstList().insert(
+ BasicBlock::iterator(middle2_bb->getTerminator()), PN2);
+
+ } break;
+
+ default:
+ continue;
+
+ }
+
+ PHINode *PN = PHINode::Create(Int1Ty, 3, "");
+
+ switch (FcmpInst->getPredicate()) {
+
+ case CmpInst::FCMP_OEQ:
+ /* unequal signs cannot be equal values */
+ /* goto false branch */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb);
+ /* unequal exponents cannot be equal values, too */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 0), signequal_bb);
+ /* fractions comparison */
+ PN->addIncoming(icmp_fraction_result, middle2_bb);
+ break;
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UNE:
+ /* unequal signs are unequal values */
+ /* goto true branch */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+ /* unequal exponents are unequal values, too */
+ PN->addIncoming(icmp_exponent_result, signequal_bb);
+ /* fractions comparison */
+ PN->addIncoming(icmp_fraction_result, middle2_bb);
+ break;
+ case CmpInst::FCMP_OGT:
+ case CmpInst::FCMP_UGT:
+ /* if op1 is negative goto true branch,
+ else go on comparing */
+ PN->addIncoming(t_s1, bb);
+ PN->addIncoming(icmp_exponent_result, signequal2_bb);
+ PN->addIncoming(PN2, middle2_bb);
+ break;
+ case CmpInst::FCMP_OLT:
+ case CmpInst::FCMP_ULT:
+ /* if op0 is negative goto true branch,
+ else go on comparing */
+ PN->addIncoming(t_s0, bb);
+ PN->addIncoming(icmp_exponent_result, signequal2_bb);
+ PN->addIncoming(PN2, middle2_bb);
+ break;
+ default:
+ continue;
+
+ }
+
+ BasicBlock::iterator ii(FcmpInst);
+ ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN);
+ ++count;
+
+ }
+
+ return count;
+
+}
+
+/* splits icmps of size bitw into two nested icmps with bitw/2 size each */
+size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
+
+ size_t count = 0;
+
+ LLVMContext &C = M.getContext();
+
+ IntegerType *Int1Ty = IntegerType::getInt1Ty(C);
+ IntegerType *OldIntType = IntegerType::get(C, bitw);
+ IntegerType *NewIntType = IntegerType::get(C, bitw / 2);
+
+ std::vector<Instruction *> icomps;
+
+ if (bitw % 2) { return 0; }
+
+ /* not supported yet */
+ if (bitw > 64) { return 0; }
+
+ /* get all EQ, NE, UGT, and ULT icmps of width bitw. if the
+ * functions simplifyCompares() and simplifyIntSignedness()
+ * were executed only these four predicates should exist */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ for (auto &IN : BB) {
+
+ CmpInst *selectcmpInst = nullptr;
+
+ if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) {
+
+ if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_NE ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_UGT ||
+ selectcmpInst->getPredicate() == CmpInst::ICMP_ULT) {
+
+ auto op0 = selectcmpInst->getOperand(0);
+ auto op1 = selectcmpInst->getOperand(1);
+
+ IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+ IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType());
+
+ if (!intTyOp0 || !intTyOp1) { continue; }
+
+ /* check if the bitwidths are the one we are looking for */
+ if (intTyOp0->getBitWidth() != bitw ||
+ intTyOp1->getBitWidth() != bitw) {
+
+ continue;
+
+ }
+
+ icomps.push_back(selectcmpInst);
+
+ }
+
+ }
+
+ }
+
+ }
+
+ }
+
+ if (!icomps.size()) { return 0; }
+
+ for (auto &IcmpInst : icomps) {
+
+ BasicBlock *bb = IcmpInst->getParent();
+
+ auto op0 = IcmpInst->getOperand(0);
+ auto op1 = IcmpInst->getOperand(1);
+
+ auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+
+ BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
+
+ /* create the comparison of the top halves of the original operands */
+ Instruction *s_op0, *op0_high, *s_op1, *op1_high, *icmp_high;
+
+ s_op0 = BinaryOperator::Create(Instruction::LShr, op0,
+ ConstantInt::get(OldIntType, bitw / 2));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0);
+ op0_high = new TruncInst(s_op0, NewIntType);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ op0_high);
+
+ s_op1 = BinaryOperator::Create(Instruction::LShr, op1,
+ ConstantInt::get(OldIntType, bitw / 2));
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1);
+ op1_high = new TruncInst(s_op1, NewIntType);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ op1_high);
+
+ icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high);
+ bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()),
+ icmp_high);
+
+ /* now we have to destinguish between == != and > < */
+ if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) {
+
+ /* transformation for == and != icmps */
+
+ /* create a compare for the lower half of the original operands */
+ Instruction *op0_low, *op1_low, *icmp_low;
+ BasicBlock * cmp_low_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+
+ op0_low = new TruncInst(op0, NewIntType);
+ cmp_low_bb->getInstList().push_back(op0_low);
+
+ op1_low = new TruncInst(op1, NewIntType);
+ cmp_low_bb->getInstList().push_back(op1_low);
+
+ icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low);
+ cmp_low_bb->getInstList().push_back(icmp_low);
+ BranchInst::Create(end_bb, cmp_low_bb);
+
+ /* dependent on the cmp of the high parts go to the end or go on with
+ * the comparison */
+ auto term = bb->getTerminator();
+ if (pred == CmpInst::ICMP_EQ) {
+
+ BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb);
+
+ } else {
+
+ /* CmpInst::ICMP_NE */
+ BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb);
+
+ }
+
+ term->eraseFromParent();
+
+ /* create the PHI and connect the edges accordingly */
+ PHINode *PN = PHINode::Create(Int1Ty, 2, "");
+ PN->addIncoming(icmp_low, cmp_low_bb);
+ if (pred == CmpInst::ICMP_EQ) {
+
+ PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb);
+
+ } else {
+
+ /* CmpInst::ICMP_NE */
+ PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+
+ }
+
+ /* replace the old icmp with the new PHI */
+ BasicBlock::iterator ii(IcmpInst);
+ ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+ } else {
+
+ /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */
+ /* transformations for < and > */
+
+ /* create a basic block which checks for the inverse predicate.
+ * if this is true we can go to the end if not we have to go to the
+ * bb which checks the lower half of the operands */
+ Instruction *icmp_inv_cmp, *op0_low, *op1_low, *icmp_low;
+ BasicBlock * inv_cmp_bb =
+ BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb);
+ if (pred == CmpInst::ICMP_UGT) {
+
+ icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT,
+ op0_high, op1_high);
+
+ } else {
+
+ icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT,
+ op0_high, op1_high);
+
+ }
+
+ inv_cmp_bb->getInstList().push_back(icmp_inv_cmp);
+
+ auto term = bb->getTerminator();
+ term->eraseFromParent();
+ BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb);
+
+ /* create a bb which handles the cmp of the lower halves */
+ BasicBlock *cmp_low_bb =
+ BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb);
+ op0_low = new TruncInst(op0, NewIntType);
+ cmp_low_bb->getInstList().push_back(op0_low);
+ op1_low = new TruncInst(op1, NewIntType);
+ cmp_low_bb->getInstList().push_back(op1_low);
+
+ icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low);
+ cmp_low_bb->getInstList().push_back(icmp_low);
+ BranchInst::Create(end_bb, cmp_low_bb);
+
+ BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb);
+
+ PHINode *PN = PHINode::Create(Int1Ty, 3);
+ PN->addIncoming(icmp_low, cmp_low_bb);
+ PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb);
+ PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb);
+
+ BasicBlock::iterator ii(IcmpInst);
+ ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN);
+
+ }
+
+ ++count;
+
+ }
+
+ return count;
+
+}
+
+bool SplitComparesTransform::runOnModule(Module &M) {
+
+ int bitw = 64;
+ size_t count = 0;
+
+ char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
+ if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
+ if (bitw_env) { bitw = atoi(bitw_env); }
+
+ enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL;
+
+ if ((isatty(2) && getenv("AFL_QUIET") == NULL) ||
+ getenv("AFL_DEBUG") != NULL) {
+
+ printf(
+ "Split-compare-pass by laf.intel@gmail.com, extended by "
+ "heiko@hexco.de\n");
+
+ } else {
+
+ be_quiet = 1;
+
+ }
+
+ if (enableFPSplit) {
+
+ count = splitFPCompares(M);
+
+ /*
+ if (!be_quiet) {
+
+ errs() << "Split-floatingpoint-compare-pass: " << count
+ << " FP comparisons split\n";
+
+ }
+
+ */
+ simplifyFPCompares(M);
+
+ }
+
+ simplifyCompares(M);
+
+ simplifyIntSignedness(M);
+
+ switch (bitw) {
+
+ case 64:
+ count += splitIntCompares(M, bitw);
+ /*
+ if (!be_quiet)
+ errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
+ count
+ << " split\n";
+ */
+ bitw >>= 1;
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
+ [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */
+#endif
+ case 32:
+ count += splitIntCompares(M, bitw);
+ /*
+ if (!be_quiet)
+ errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
+ count
+ << " split\n";
+ */
+ bitw >>= 1;
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
+ [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */
+#endif
+ case 16:
+ count += splitIntCompares(M, bitw);
+ /*
+ if (!be_quiet)
+ errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
+ count
+ << " split\n";
+ */
+ bitw >>= 1;
+ break;
+
+ default:
+ // if (!be_quiet) errs() << "NOT Running split-compare-pass \n";
+ return false;
+ break;
+
+ }
+
+ verifyModule(M);
+ return true;
+
+}
+
+static void registerSplitComparesPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ PM.add(new SplitComparesTransform());
+
+}
+
+static RegisterStandardPasses RegisterSplitComparesPass(
+ PassManagerBuilder::EP_OptimizerLast, registerSplitComparesPass);
+
+static RegisterStandardPasses RegisterSplitComparesTransPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerSplitComparesPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterSplitComparesTransPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast,
+ registerSplitComparesPass);
+#endif
+
diff --git a/instrumentation/split-switches-pass.so.cc b/instrumentation/split-switches-pass.so.cc
new file mode 100644
index 00000000..97ab04a4
--- /dev/null
+++ b/instrumentation/split-switches-pass.so.cc
@@ -0,0 +1,450 @@
+/*
+ * Copyright 2016 laf-intel
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
+#include "llvm/Config/llvm-config.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#include "llvm/IR/IRBuilder.h"
+#if LLVM_VERSION_MAJOR > 3 || \
+ (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
+ #include "llvm/IR/Verifier.h"
+ #include "llvm/IR/DebugInfo.h"
+#else
+ #include "llvm/Analysis/Verifier.h"
+ #include "llvm/DebugInfo.h"
+ #define nullptr 0
+#endif
+
+#include <set>
+#include "afl-llvm-common.h"
+
+using namespace llvm;
+
+namespace {
+
+class SplitSwitchesTransform : public ModulePass {
+
+ public:
+ static char ID;
+ SplitSwitchesTransform() : ModulePass(ID) {
+
+ initInstrumentList();
+
+ }
+
+ bool runOnModule(Module &M) override;
+
+#if LLVM_VERSION_MAJOR >= 4
+ StringRef getPassName() const override {
+
+#else
+ const char *getPassName() const override {
+
+#endif
+ return "splits switch constructs";
+
+ }
+
+ struct CaseExpr {
+
+ ConstantInt *Val;
+ BasicBlock * BB;
+
+ CaseExpr(ConstantInt *val = nullptr, BasicBlock *bb = nullptr)
+ : Val(val), BB(bb) {
+
+ }
+
+ };
+
+ typedef std::vector<CaseExpr> CaseVector;
+
+ private:
+ bool splitSwitches(Module &M);
+ bool transformCmps(Module &M, const bool processStrcmp,
+ const bool processMemcmp);
+ BasicBlock *switchConvert(CaseVector Cases, std::vector<bool> bytesChecked,
+ BasicBlock *OrigBlock, BasicBlock *NewDefault,
+ Value *Val, unsigned level);
+
+};
+
+} // namespace
+
+char SplitSwitchesTransform::ID = 0;
+
+/* switchConvert - Transform simple list of Cases into list of CaseRange's */
+BasicBlock *SplitSwitchesTransform::switchConvert(
+ CaseVector Cases, std::vector<bool> bytesChecked, BasicBlock *OrigBlock,
+ BasicBlock *NewDefault, Value *Val, unsigned level) {
+
+ unsigned ValTypeBitWidth = Cases[0].Val->getBitWidth();
+ IntegerType *ValType =
+ IntegerType::get(OrigBlock->getContext(), ValTypeBitWidth);
+ IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8);
+ unsigned BytesInValue = bytesChecked.size();
+ std::vector<uint8_t> setSizes;
+ std::vector<std::set<uint8_t> > byteSets(BytesInValue, std::set<uint8_t>());
+
+ assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64);
+
+ /* for each of the possible cases we iterate over all bytes of the values
+ * build a set of possible values at each byte position in byteSets */
+ for (CaseExpr &Case : Cases) {
+
+ for (unsigned i = 0; i < BytesInValue; i++) {
+
+ uint8_t byte = (Case.Val->getZExtValue() >> (i * 8)) & 0xFF;
+ byteSets[i].insert(byte);
+
+ }
+
+ }
+
+ /* find the index of the first byte position that was not yet checked. then
+ * save the number of possible values at that byte position */
+ unsigned smallestIndex = 0;
+ unsigned smallestSize = 257;
+ for (unsigned i = 0; i < byteSets.size(); i++) {
+
+ if (bytesChecked[i]) continue;
+ if (byteSets[i].size() < smallestSize) {
+
+ smallestIndex = i;
+ smallestSize = byteSets[i].size();
+
+ }
+
+ }
+
+ assert(bytesChecked[smallestIndex] == false);
+
+ /* there are only smallestSize different bytes at index smallestIndex */
+
+ Instruction *Shift, *Trunc;
+ Function * F = OrigBlock->getParent();
+ BasicBlock * NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock", F);
+ Shift = BinaryOperator::Create(Instruction::LShr, Val,
+ ConstantInt::get(ValType, smallestIndex * 8));
+ NewNode->getInstList().push_back(Shift);
+
+ if (ValTypeBitWidth > 8) {
+
+ Trunc = new TruncInst(Shift, ByteType);
+ NewNode->getInstList().push_back(Trunc);
+
+ } else {
+
+ /* not necessary to trunc */
+ Trunc = Shift;
+
+ }
+
+ /* this is a trivial case, we can directly check for the byte,
+ * if the byte is not found go to default. if the byte was found
+ * mark the byte as checked. if this was the last byte to check
+ * we can finally execute the block belonging to this case */
+
+ if (smallestSize == 1) {
+
+ uint8_t byte = *(byteSets[smallestIndex].begin());
+
+ /* insert instructions to check whether the value we are switching on is
+ * equal to byte */
+ ICmpInst *Comp =
+ new ICmpInst(ICmpInst::ICMP_EQ, Trunc, ConstantInt::get(ByteType, byte),
+ "byteMatch");
+ NewNode->getInstList().push_back(Comp);
+
+ bytesChecked[smallestIndex] = true;
+ bool allBytesAreChecked = true;
+
+ for (std::vector<bool>::iterator BCI = bytesChecked.begin(),
+ E = bytesChecked.end();
+ BCI != E; ++BCI) {
+
+ if (!*BCI) {
+
+ allBytesAreChecked = false;
+ break;
+
+ }
+
+ }
+
+ // if (std::all_of(bytesChecked.begin(), bytesChecked.end(),
+ // [](bool b) { return b; })) {
+
+ if (allBytesAreChecked) {
+
+ assert(Cases.size() == 1);
+ BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode);
+
+ /* we have to update the phi nodes! */
+ for (BasicBlock::iterator I = Cases[0].BB->begin();
+ I != Cases[0].BB->end(); ++I) {
+
+ if (!isa<PHINode>(&*I)) { continue; }
+ PHINode *PN = cast<PHINode>(I);
+
+ /* Only update the first occurrence. */
+ unsigned Idx = 0, E = PN->getNumIncomingValues();
+ for (; Idx != E; ++Idx) {
+
+ if (PN->getIncomingBlock(Idx) == OrigBlock) {
+
+ PN->setIncomingBlock(Idx, NewNode);
+ break;
+
+ }
+
+ }
+
+ }
+
+ } else {
+
+ BasicBlock *BB = switchConvert(Cases, bytesChecked, OrigBlock, NewDefault,
+ Val, level + 1);
+ BranchInst::Create(BB, NewDefault, Comp, NewNode);
+
+ }
+
+ }
+
+ /* there is no byte which we can directly check on, split the tree */
+ else {
+
+ std::vector<uint8_t> byteVector;
+ std::copy(byteSets[smallestIndex].begin(), byteSets[smallestIndex].end(),
+ std::back_inserter(byteVector));
+ std::sort(byteVector.begin(), byteVector.end());
+ uint8_t pivot = byteVector[byteVector.size() / 2];
+
+ /* we already chose to divide the cases based on the value of byte at index
+ * smallestIndex the pivot value determines the threshold for the decicion;
+ * if a case value
+ * is smaller at this byte index move it to the LHS vector, otherwise to the
+ * RHS vector */
+
+ CaseVector LHSCases, RHSCases;
+
+ for (CaseExpr &Case : Cases) {
+
+ uint8_t byte = (Case.Val->getZExtValue() >> (smallestIndex * 8)) & 0xFF;
+
+ if (byte < pivot) {
+
+ LHSCases.push_back(Case);
+
+ } else {
+
+ RHSCases.push_back(Case);
+
+ }
+
+ }
+
+ BasicBlock *LBB, *RBB;
+ LBB = switchConvert(LHSCases, bytesChecked, OrigBlock, NewDefault, Val,
+ level + 1);
+ RBB = switchConvert(RHSCases, bytesChecked, OrigBlock, NewDefault, Val,
+ level + 1);
+
+ /* insert instructions to check whether the value we are switching on is
+ * equal to byte */
+ ICmpInst *Comp =
+ new ICmpInst(ICmpInst::ICMP_ULT, Trunc,
+ ConstantInt::get(ByteType, pivot), "byteMatch");
+ NewNode->getInstList().push_back(Comp);
+ BranchInst::Create(LBB, RBB, Comp, NewNode);
+
+ }
+
+ return NewNode;
+
+}
+
+bool SplitSwitchesTransform::splitSwitches(Module &M) {
+
+#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7)
+ LLVMContext &C = M.getContext();
+#endif
+
+ std::vector<SwitchInst *> switches;
+
+ /* iterate over all functions, bbs and instruction and add
+ * all switches to switches vector for later processing */
+ for (auto &F : M) {
+
+ if (!isInInstrumentList(&F)) continue;
+
+ for (auto &BB : F) {
+
+ SwitchInst *switchInst = nullptr;
+
+ if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) {
+
+ if (switchInst->getNumCases() < 1) continue;
+ switches.push_back(switchInst);
+
+ }
+
+ }
+
+ }
+
+ if (!switches.size()) return false;
+ /*
+ if (!be_quiet)
+ errs() << "Rewriting " << switches.size() << " switch statements "
+ << "\n";
+ */
+ for (auto &SI : switches) {
+
+ BasicBlock *CurBlock = SI->getParent();
+ BasicBlock *OrigBlock = CurBlock;
+ Function * F = CurBlock->getParent();
+ /* this is the value we are switching on */
+ Value * Val = SI->getCondition();
+ BasicBlock *Default = SI->getDefaultDest();
+ unsigned bitw = Val->getType()->getIntegerBitWidth();
+
+ /*
+ if (!be_quiet)
+ errs() << "switch: " << SI->getNumCases() << " cases " << bitw
+ << " bit\n";
+ */
+
+ /* If there is only the default destination or the condition checks 8 bit or
+ * less, don't bother with the code below. */
+ if (!SI->getNumCases() || bitw <= 8) {
+
+ // if (!be_quiet) errs() << "skip trivial switch..\n";
+ continue;
+
+ }
+
+ /* Create a new, empty default block so that the new hierarchy of
+ * if-then statements go to this and the PHI nodes are happy.
+ * if the default block is set as an unreachable we avoid creating one
+ * because will never be a valid target.*/
+ BasicBlock *NewDefault = nullptr;
+ NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default);
+ BranchInst::Create(Default, NewDefault);
+
+ /* Prepare cases vector. */
+ CaseVector Cases;
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
+ ++i)
+#if LLVM_VERSION_MAJOR < 5
+ Cases.push_back(CaseExpr(i.getCaseValue(), i.getCaseSuccessor()));
+#else
+ Cases.push_back(CaseExpr(i->getCaseValue(), i->getCaseSuccessor()));
+#endif
+ /* bugfix thanks to pbst
+ * round up bytesChecked (in case getBitWidth() % 8 != 0) */
+ std::vector<bool> bytesChecked((7 + Cases[0].Val->getBitWidth()) / 8,
+ false);
+ BasicBlock * SwitchBlock =
+ switchConvert(Cases, bytesChecked, OrigBlock, NewDefault, Val, 0);
+
+ /* Branch to our shiny new if-then stuff... */
+ BranchInst::Create(SwitchBlock, OrigBlock);
+
+ /* We are now done with the switch instruction, delete it. */
+ CurBlock->getInstList().erase(SI);
+
+ /* we have to update the phi nodes! */
+ for (BasicBlock::iterator I = Default->begin(); I != Default->end(); ++I) {
+
+ if (!isa<PHINode>(&*I)) { continue; }
+ PHINode *PN = cast<PHINode>(I);
+
+ /* Only update the first occurrence. */
+ unsigned Idx = 0, E = PN->getNumIncomingValues();
+ for (; Idx != E; ++Idx) {
+
+ if (PN->getIncomingBlock(Idx) == OrigBlock) {
+
+ PN->setIncomingBlock(Idx, NewDefault);
+ break;
+
+ }
+
+ }
+
+ }
+
+ }
+
+ verifyModule(M);
+ return true;
+
+}
+
+bool SplitSwitchesTransform::runOnModule(Module &M) {
+
+ if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL)
+ printf("Running split-switches-pass by laf.intel@gmail.com\n");
+ else
+ be_quiet = 1;
+ splitSwitches(M);
+ verifyModule(M);
+
+ return true;
+
+}
+
+static void registerSplitSwitchesTransPass(const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+
+ auto p = new SplitSwitchesTransform();
+ PM.add(p);
+
+}
+
+static RegisterStandardPasses RegisterSplitSwitchesTransPass(
+ PassManagerBuilder::EP_OptimizerLast, registerSplitSwitchesTransPass);
+
+static RegisterStandardPasses RegisterSplitSwitchesTransPass0(
+ PassManagerBuilder::EP_EnabledOnOptLevel0, registerSplitSwitchesTransPass);
+
+#if LLVM_VERSION_MAJOR >= 11
+static RegisterStandardPasses RegisterSplitSwitchesTransPassLTO(
+ PassManagerBuilder::EP_FullLinkTimeOptimizationLast,
+ registerSplitSwitchesTransPass);
+#endif
+