From 6f49257aaf89251176b8c42c2158a8feb5e8f852 Mon Sep 17 00:00:00 2001 From: David Rubin Date: Mon, 18 Mar 2024 15:22:36 -0700 Subject: [PATCH] build cpython through the script they provide --- .github/workflows/ci.yml | 8 +- .gitignore | 2 +- LICENSE | 674 +++++++++++++++++++++++ build.zig | 83 ++- build.zig.zon | 8 +- demo/print_ast.py | 7 - demo/show_pyc.py | 190 +------ demo/test.py | 14 +- demo/test.pyc | Bin 126 -> 0 bytes src/Manager.zig | 65 +-- src/README.md | 9 +- src/compiler/Marshal.zig | 26 +- src/compiler/opcodes.zig | 2 +- src/frontend/Ast.zig | 258 --------- src/frontend/Compiler.zig | 282 ---------- src/frontend/Parser.zig | 101 ---- src/frontend/Python.zig | 55 ++ src/frontend/cpython.zig | 59 +++ src/frontend/new-compiler/PyObject.zig | 2 - src/frontend/new-compiler/SymTable.zig | 45 -- src/frontend/tokenizer/Tokenizer.zig | 704 ------------------------- src/std-extra/mem.zig | 71 --- src/std-extra/std.zig | 3 - src/vm/Object.zig | 100 +++- src/vm/Vm.zig | 45 +- src/{ => vm}/builtins.zig | 4 +- src/{ => vm}/panic.zig | 2 +- tests/behaviour/add.py | 6 +- tests/behaviour/methods.py | 3 - tests/cases.zig | 15 +- tests/matrix.zig | 39 +- tools/opcode2zig.zig | 19 +- vendor/README.md | 3 + {includes => vendor}/opcode.h | 0 34 files changed, 1131 insertions(+), 1773 deletions(-) create mode 100644 LICENSE delete mode 100644 demo/test.pyc delete mode 100644 src/frontend/Ast.zig delete mode 100644 src/frontend/Compiler.zig delete mode 100644 src/frontend/Parser.zig create mode 100644 src/frontend/Python.zig create mode 100644 src/frontend/cpython.zig delete mode 100644 src/frontend/new-compiler/PyObject.zig delete mode 100644 src/frontend/new-compiler/SymTable.zig delete mode 100644 src/frontend/tokenizer/Tokenizer.zig delete mode 100644 src/std-extra/mem.zig delete mode 100644 src/std-extra/std.zig rename src/{ => vm}/builtins.zig (98%) rename src/{ => vm}/panic.zig (96%) create mode 100644 vendor/README.md rename {includes => vendor}/opcode.h (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7103e96..745e69b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: - name: Setup Zig uses: goto-bus-stop/setup-zig@v2 with: - version: 0.12.0-dev.3076+6e078883e + version: 0.13.0 - name: Setup Python3.10 uses: actions/setup-python@v2 @@ -27,7 +27,8 @@ jobs: with: path: | ~/.cache/zig - zig-cache + .zig-cache + zig-out/lib key: osmium-${{hashFiles('build.zig.zon')}} - name: Run Tests @@ -38,5 +39,6 @@ jobs: with: path: | ~/.cache/zig - zig-cache + .zig-cache + zig-out/lib key: osmium-${{hashFiles('build.zig.zon')}} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 0012c0f..1c724a5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -zig-cache/ +.zig-cache/ zig-out/ temp/ .vscode/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e72bfdd --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. \ No newline at end of file diff --git a/build.zig b/build.zig index d9004fb..2cc831f 100644 --- a/build.zig +++ b/build.zig @@ -2,33 +2,31 @@ const std = @import("std"); const cases = @import("tests/cases.zig"); -var trace: ?bool = false; +var trace: bool = false; var @"enable-bench": ?bool = false; var backend: TraceBackend = .None; pub fn build(b: *std.Build) !void { - const target = b.standardTargetOptions(.{}); + const query = b.standardTargetOptionsQueryOnly(.{}); const optimize = b.standardOptimizeOption(.{}); + // we don't support building cpython to another platform yet + if (!query.isNative()) { + @panic("cross-compilation isn't allowed"); + } + const exe = b.addExecutable(.{ .name = "osmium", - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, + .root_source_file = b.path("src/main.zig"), + .target = b.graph.host, .optimize = optimize, }); - // Deps - const std_extras = b.addModule("std-extras", .{ - .root_source_file = .{ .path = "src/std-extra/std.zig" }, - }); - - exe.root_module.addImport("std-extras", std_extras); - trace = b.option(bool, "trace", \\Enables tracing of the compiler using the default backend (spall) - ); + ) orelse false; - if (trace) |_| { + if (trace) { backend = b.option(TraceBackend, "trace-backend", \\Switch between what backend to use. None is default. ) orelse backend; @@ -46,8 +44,7 @@ pub fn build(b: *std.Build) !void { exe.use_lld = use_llvm; const exe_options = b.addOptions(); - - exe_options.addOption(bool, "trace", trace orelse false); + exe_options.addOption(bool, "trace", trace); exe_options.addOption(TraceBackend, "backend", backend); exe_options.addOption(std.log.Level, "debug_log", debug_log); exe_options.addOption(usize, "src_file_trimlen", std.fs.path.dirname(std.fs.path.dirname(@src().file).?).?.len); @@ -56,8 +53,16 @@ pub fn build(b: *std.Build) !void { const tracer_dep = b.dependency("tracer", .{}); exe.root_module.addImport("tracer", tracer_dep.module("tracer")); - // exe.linkLibC(); // Needs libc. + const cpython_step = b.step("cpython", "Builds libcpython for the host"); + const cpython_path = try generateLibPython(b, cpython_step, optimize); + + exe.step.dependOn(cpython_step); + exe.linkLibC(); + exe.addObjectFile(cpython_path); + + const cpython_install = b.addInstallFile(cpython_path, "lib/libpython3.10.a"); + b.getInstallStep().dependOn(&cpython_install.step); b.installArtifact(exe); const run_cmd = b.addRunArtifact(exe); @@ -72,7 +77,7 @@ pub fn build(b: *std.Build) !void { // Generate steps const opcode_step = b.step("opcode", "Generate opcodes"); - generateOpCode(b, opcode_step, target); + generateOpCode(b, opcode_step); // Test cases const test_step = b.step("test", "Test Osmium"); @@ -88,19 +93,55 @@ const TraceBackend = enum { fn generateOpCode( b: *std.Build, step: *std.Build.Step, - target: std.Build.ResolvedTarget, ) void { const translator = b.addExecutable(.{ .name = "opcode2zig", - .root_source_file = .{ .path = "./tools/opcode2zig.zig" }, - .target = target, + .root_source_file = b.path("tools/opcode2zig.zig"), + .target = b.graph.host, .optimize = .ReleaseFast, }); const run_cmd = b.addRunArtifact(translator); - run_cmd.addArg("includes/opcode.h"); + run_cmd.addArg("vendor/opcode.h"); run_cmd.addArg("src/compiler/opcodes.zig"); step.dependOn(&run_cmd.step); } + +fn generateLibPython( + b: *std.Build, + step: *std.Build.Step, + optimize: std.builtin.OptimizeMode, +) !std.Build.LazyPath { + const source = b.dependency("python", .{}); + + // TODO: cache properly + const maybe_lib_path = try b.build_root.join(b.allocator, &.{ "zig-out", "lib", "libpython3.10.a" }); + const result = if (std.fs.accessAbsolute(maybe_lib_path, .{})) true else |_| false; + if (result) { + return b.path("zig-out/lib/libpython3.10.a"); + } + + const configure_run = std.Build.Step.Run.create(b, "cpython-configure"); + configure_run.setCwd(source.path(".")); + configure_run.addFileArg(source.path("configure")); + configure_run.addArgs(&.{ + "--disable-shared", + if (optimize == .Debug) "" else "--enable-optimizations", + }); + + const make_run = std.Build.Step.Run.create(b, "cpython-make"); + make_run.setCwd(source.path(".")); + make_run.addArgs(&.{ + "make", b.fmt("-j{d}", .{cpu: { + const cpu_set = try std.posix.sched_getaffinity(0); + break :cpu std.posix.CPU_COUNT(cpu_set); + }}), + }); + + make_run.step.dependOn(&configure_run.step); + step.dependOn(&make_run.step); + + return source.path("libpython3.10.a"); +} diff --git a/build.zig.zon b/build.zig.zon index 18cf222..367fe22 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -4,8 +4,12 @@ .paths = .{""}, .dependencies = .{ .tracer = .{ - .url = "https://github.com/Rexicon226/zig-tracer/archive/8c24ad8b1767c874c926417ddccc83981d66aedb.tar.gz", - .hash = "122012ae514d30f7304fd4e9668e6b8a20e0fa5db28c72b32b3f64e5945e5a1cd8a1", + .url = "https://github.com/Rexicon226/zig-tracer/archive/f0c24a3e0ecf232493ab2fadc65f06e48956ccba.tar.gz", + .hash = "1220857ddca6c829d4b68f35e929b32e4eee13c265bc096c634ae8b3d6da7daf34df", + }, + .python = .{ + .url = "https://github.com/python/cpython/archive/333c7dccd87c637d0b15cf81f9bbec28e39664fd.tar.gz", + .hash = "1220c520b358bd5e0bbcfae04b2e9e963ad3c3d7cc3b5ba24a0721e60a0123bfb1ea", }, }, } diff --git a/demo/print_ast.py b/demo/print_ast.py index 788f781..e69de29 100644 --- a/demo/print_ast.py +++ b/demo/print_ast.py @@ -1,7 +0,0 @@ - -import marshal - -filename = './demo/test.py' -with open(filename, 'r') as f: - bytes = marshal.load(f) - diff --git a/demo/show_pyc.py b/demo/show_pyc.py index 532d79e..86f11ee 100644 --- a/demo/show_pyc.py +++ b/demo/show_pyc.py @@ -1,177 +1,23 @@ -# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 -# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt - -""" -Dump the contents of a .pyc file. - -The output will only be correct if run with the same version of Python that -produced the .pyc. - -""" - -import binascii import dis import marshal import struct import sys -import time -import types - - -def show_pyc_file(fname): - f = open(fname, "rb") - magic = f.read(4) - print("magic %s" % (binascii.hexlify(magic))) - read_date_and_size = True - flags = struct.unpack('= 0x80: - line_incr -= 0x100 - line_num += line_incr - if line_num != last_line_num: - yield (byte_num, line_num) - -def flag_words(flags, flag_defs): - words = [] - for word, flag in flag_defs: - if flag & flags: - words.append(word) - return ", ".join(words) - -def show_file(fname): - if fname.endswith('pyc'): - show_pyc_file(fname) - elif fname.endswith('py'): - show_py_file(fname) - else: - print("Odd file:", fname) - -def main(args): - if args[0] == '-c': - show_py_text(" ".join(args[1:]).replace(";", "\n")) - else: - for a in args: - show_file(a) -if __name__ == '__main__': - main(sys.argv[1:]) \ No newline at end of file +def disassemble_pyc(filename): + with open(filename, 'rb') as f: + # Read the magic number and timestamp/header + magic = f.read(4) + timestamp = f.read(4) + if sys.version_info >= (3, 7): + # Python 3.7+ includes the size of the source file in the header + size = f.read(4) + code = marshal.load(f) + dis.dis(code) + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python disassemble_pyc.py ") + sys.exit(1) + + pyc_file = sys.argv[1] + disassemble_pyc(pyc_file) diff --git a/demo/test.py b/demo/test.py index 2170cd7..d18a9ce 100644 --- a/demo/test.py +++ b/demo/test.py @@ -1,5 +1,11 @@ -def a(x): - return x * 2 +a = 1 +b = 2 +c = a + b -b = a(10) -print(b) \ No newline at end of file +print(c) + +a = 1 +b = a + 2 +a += 1 + +print(a, b) \ No newline at end of file diff --git a/demo/test.pyc b/demo/test.pyc deleted file mode 100644 index c4af8caa3b4f699af36893a7baec1925e2295dbd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 126 zcmd1j<>g`kf&&jXrV0Y-#~=W#*NvWGG?=ssa @panic("invalid file provided"), + else => |e| return e, + } + }; + const source_file_size = (try source_file.stat()).size; + const source = try source_file.readToEndAlloc(manager.allocator, source_file_size); + + // Parse the code object + const object = try Marshal.load(manager.allocator, source); + + var vm = try Vm.init(); + try vm.run(manager.allocator, object); +} + +pub fn run_file(manager: *Manager, file_name: []const u8) !void { + const source_file = std.fs.cwd().openFile(file_name, .{ .lock = .exclusive }) catch |err| { + switch (err) { + error.FileNotFound => @panic("invalid file provided"), + else => |e| return e, + } + }; + defer source_file.close(); const source_file_size = (try source_file.stat()).size; @@ -42,42 +64,9 @@ pub fn run_pyc(manager: *Manager, file_name: []const u8) !void { 0, ); - // Parse the code object - const object = try Marshal.load(manager.allocator, source); + const pyc = try Python.parse(source, manager.allocator); + const object = try Marshal.load(manager.allocator, pyc); var vm = try Vm.init(); try vm.run(manager.allocator, object); } - -pub fn run_file(manager: *Manager, file_name: []const u8) !void { - _ = std.ChildProcess.run(.{ - .allocator = manager.allocator, - .argv = &.{ - "python3.10", - "-m", - "py_compile", - file_name, - }, - .cwd = ".", - .expand_arg0 = .expand, - }) catch @panic("failed to side-run python"); - - // This outputs to __pycache__/file_name.cpython-310.pyc - const output_file_name: []const u8 = name: { - const trimmed_name: []const u8 = file_name[0 .. file_name.len - ".py".len]; - const output_file = std.fs.path.basename(trimmed_name); - - log.debug("Trimmed: {s}", .{trimmed_name}); - - const output_dir = std.fs.path.dirname(trimmed_name) orelse @panic("why in root"); - - const output_pyc = try std.fmt.allocPrint(manager.allocator, "{s}/__pycache__/{s}.cpython-310.pyc", .{ output_dir, output_file }); - - break :name output_pyc; - }; - - log.debug("File: {s}", .{output_file_name}); - - // Run python on that. - try manager.run_pyc(output_file_name); -} diff --git a/src/README.md b/src/README.md index 92e7c92..afddae7 100644 --- a/src/README.md +++ b/src/README.md @@ -3,13 +3,10 @@ ### Directory tree `compiler/` - Compiler source code -basically the area of Osmium that is in charge of converting `.pyc` into a list of seed instructions for the VM. +Compiles an AST into codeobjects of linear bytecode. `frontend/` - Frontend source code -for parsing and structing `.py` files into code objects. +Parsing Python source into an AST `vm/` - VM source code -for executing the instructions. - -`std-extra` - Extra standard library modules -for things I don't feel like PRing into the stdlib, but I still need sometimes. +The Python Virtual Machine; runs the input bytecode \ No newline at end of file diff --git a/src/compiler/Marshal.zig b/src/compiler/Marshal.zig index dd8a8f7..efe4e36 100644 --- a/src/compiler/Marshal.zig +++ b/src/compiler/Marshal.zig @@ -106,6 +106,22 @@ fn read_object(marshal: *Marshal) Result { }; }, + .TYPE_FROZENSET => { + const size = marshal.read_long(); + var results = std.ArrayList(Result).init(marshal.allocator); + for (0..@intCast(size.Int)) |_| { + results.append(marshal.read_object()) catch { + @panic("failed to append to frozenset"); + }; + } + result = .{ + .Set = .{ + .set = results.toOwnedSlice() catch @panic("OOM"), + .frozen = true, + }, + }; + }, + .TYPE_INT => result = marshal.read_long(), .TYPE_NONE => result = .{ .None = {} }, @@ -226,6 +242,12 @@ pub const Result = union(enum) { None: void, Bool: bool, + /// Both frozenset and set. + Set: struct { + set: []const Result, + frozen: bool, + }, + CodeObject: *CodeObject, pub fn format( @@ -355,11 +377,11 @@ fn set_version(marshal: *Marshal, magic_bytes: [4]u8) void { } /// Set's a bit at `offset` in `int` -fn testBit(int: anytype, comptime offset: u8) bool { +fn testBit(int: anytype, comptime offset: u3) bool { const mask = @as(u8, 1) << offset; return (int & mask) != 0; } -fn clearBit(int: anytype, comptime offset: u8) @TypeOf(int) { +fn clearBit(int: anytype, comptime offset: u3) @TypeOf(int) { return int & ~(@as(u8, 1) << offset); } diff --git a/src/compiler/opcodes.zig b/src/compiler/opcodes.zig index 18d880d..f014c26 100644 --- a/src/compiler/opcodes.zig +++ b/src/compiler/opcodes.zig @@ -130,5 +130,5 @@ pub const OpCode = enum(u8) { SET_UPDATE = 163, DICT_MERGE = 164, DICT_UPDATE = 165, - // EXCEPT_HANDLER = 257, + // EXCEPT_HANDLER = 257, TODO: why is there 257 in a u8 enum? }; diff --git a/src/frontend/Ast.zig b/src/frontend/Ast.zig deleted file mode 100644 index e3bb2c0..0000000 --- a/src/frontend/Ast.zig +++ /dev/null @@ -1,258 +0,0 @@ -// Uses https://docs.python.org/3/library/ast.html - -const std = @import("std"); -const Allocator = std.mem.Allocator; - -const AstError = error{OutOfMemory}; - -pub const Root = union(enum) { - /// A module is the entire contents of a single file. - Module: struct { - body: []Statement, - }, -}; - -pub const Statement = union(enum) { - Break: void, - Continue: void, - Pass: void, - Expr: Expression, - Assign: struct { - targets: []Expression, - value: *Expression, - }, - Return: struct { - value: ?*Expression, - }, - FunctionDef: struct { - name: []const u8, - body: []Expression, - }, - If: struct { - case: *Expression, - body: []Statement, - }, - - pub fn newIf(case: *Expression, body: []Statement) Statement { - return .{ - .If = .{ - .case = case, - .body = body, - }, - }; - } - - pub fn newAssign(targets: []Expression, value: *Expression) Statement { - return .{ - .Assign = .{ - .targets = targets, - .value = value, - }, - }; - } - - pub fn format( - self: Statement, - comptime fmt: []const u8, - _: std.fmt.FormatOptions, - writer: anytype, - ) !void { - std.debug.assert(fmt.len == 0); - - switch (self) { - .Break => try writer.print("BREAK", .{}), - .Continue => try writer.print("CONTINUE", .{}), - .Pass => try writer.print("PASS", .{}), - .Assign => |assign| try writer.print("Assign: {}", .{assign.value}), - .Expr => |expr| try writer.print("{}", .{expr}), - else => try writer.print("TODO: format {s}", .{@tagName(self)}), - } - } -}; - -pub const Expression = union(enum) { - BinOp: struct { - left: *Expression, - op: Op, - right: *Expression, - }, - Compare: struct { - left: *Expression, - op: CompareOp, - right: *Expression, - }, - UnaryOp: struct { - op: UnaryOp, - operand: *Expression, - }, - Call: struct { - func: *Expression, - args: []Expression, - }, - Number: struct { - value: i32, - }, - String: struct { - value: []const u8, - }, - Identifier: struct { - name: []const u8, - }, - - True: void, - False: void, - None: void, - - pub fn newCall( - func: *Expression, - args: []Expression, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = .{ - .Call = .{ - .func = func, - .args = args, - }, - }; - - return expr; - } - - pub fn newIdentifer( - name: []const u8, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = .{ - .Identifier = .{ - .name = name, - }, - }; - - return expr; - } - - pub fn newNumber( - val: i32, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = .{ - .Number = .{ - .value = val, - }, - }; - - return expr; - } - - pub fn newBinOp( - lhs: *Expression, - op: Op, - rhs: *Expression, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = .{ - .BinOp = .{ - .left = lhs, - .op = op, - .right = rhs, - }, - }; - - return expr; - } - - pub fn newCompare( - lhs: *Expression, - op: CompareOp, - rhs: *Expression, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = .{ - .Compare = .{ - .left = lhs, - .op = op, - .right = rhs, - }, - }; - - return expr; - } - - pub fn newBool( - b: bool, - allocator: Allocator, - ) AstError!*Expression { - const expr = try allocator.create(Expression); - - expr.* = if (b) - .True - else - .False; - - return expr; - } - - pub fn format( - self: Expression, - comptime fmt: []const u8, - _: std.fmt.FormatOptions, - writer: anytype, - ) !void { - std.debug.assert(fmt.len == 0); - - switch (self) { - .BinOp => |bin_op| try writer.print("{{lhs: {}, op: {}, rhs: {}}}", .{ - bin_op.left, - bin_op.op, - bin_op.right, - }), - .Number => |num| try writer.print("Number: {}", .{num.value}), - .Call => |call| try writer.print("Call: {{{}, Arg Count: {}}}", .{ call.func, call.args.len }), - .Identifier => |ident| try writer.print("Name: {s}", .{ident.name}), - else => try writer.print("TODO: format {s}", .{@tagName(self)}), - } - } -}; - -pub const UnaryOp = enum { - Invert, - Not, - UAdd, - USub, -}; - -pub const Op = enum { - Add, - Sub, - Mult, - MatMult, - Div, - Mod, - Pow, - LShift, - RShift, - BitOr, - BitXor, - BitAnd, - FloorDiv, -}; - -/// These compare ops and allow for boolean logic. -pub const CompareOp = enum { - Eq, // == - NotEq, // != - Lt, // < - LtE, // <= - Gt, // > - GtE, // >= -}; diff --git a/src/frontend/Compiler.zig b/src/frontend/Compiler.zig deleted file mode 100644 index d2d0117..0000000 --- a/src/frontend/Compiler.zig +++ /dev/null @@ -1,282 +0,0 @@ -const std = @import("std"); -const Ast = @import("Ast.zig"); - -const Allocator = std.mem.Allocator; - -const log = std.log.scoped(.compiler); - -const Compiler = @This(); -const CompilerError = error{OutOfMemory}; - -code_object: CodeObject, -next_label: Label, - -const Label = usize; - -pub fn init(allocator: std.mem.Allocator) Compiler { - return .{ - .code_object = CodeObject.init(allocator), - .next_label = 0, - }; -} - -pub fn deinit(compiler: *Compiler) void { - compiler.code_object.deinit(); -} - -pub fn compile_module(compiler: *Compiler, module: Ast.Root) !void { - try compiler.compile_statements(module.Module.body); -} - -fn compile_statements(compiler: *Compiler, statements: []Ast.Statement) CompilerError!void { - for (statements) |statement| { - try compiler.compile_statement(statement); - } -} - -fn compile_statement(compiler: *Compiler, statement: Ast.Statement) !void { - switch (statement) { - .Continue => try compiler.code_object.emit(.Continue), - .Pass => try compiler.code_object.emit(.Pass), - .Break => try compiler.code_object.emit(.Break), - - .Expr => |expr| { - try compiler.compile_expression(expr); - - // We discard the result. - // try compiler.code_object.emit(.Pop); - }, - - .Assign => |assign| { - try compiler.compile_expression(assign.value.*); - - for (assign.targets) |target| { - switch (target) { - .Identifier => |ident| { - const inst = Instruction.storeName(ident.name); - try compiler.code_object.emit(inst); - }, - else => @panic("assinging to non-ident"), - } - } - }, - - else => std.debug.panic("TODO compile_statement: {s}", .{@tagName(statement)}), - } -} - -fn compile_expression(compiler: *Compiler, expression: Ast.Expression) !void { - switch (expression) { - .Number => |number| { - const inst = Instruction.loadConst(.{ .Integer = number.value }); - try compiler.code_object.emit(inst); - }, - - .Identifier => |ident| { - const inst = Instruction.loadName(ident.name); - try compiler.code_object.emit(inst); - }, - - .Call => |call| { - try compiler.compile_expression(call.func.*); - - for (call.args) |arg| { - try compiler.compile_expression(arg); - } - - const inst = Instruction.callFunction(call.args.len); - try compiler.code_object.emit(inst); - }, - - .BinOp => |bin_op| { - try compiler.compile_expression(bin_op.left.*); - try compiler.compile_expression(bin_op.right.*); - - const op: BinaryOp = switch (bin_op.op) { - .Add => .Add, - .Mult => .Multiply, - .Div => .Divide, - .Sub => .Subtract, - else => std.debug.panic("TODO BinOp: {s}", .{@tagName(bin_op.op)}), - }; - - const inst = BinaryOp.newBinaryOp(op); - try compiler.code_object.emit(inst); - }, - - .Compare => |compare| { - try compiler.compile_expression(compare.left.*); - try compiler.compile_expression(compare.right.*); - - const op: CompareOp = switch (compare.op) { - .Eq => .Equal, - .NotEq => .NotEqual, - .Lt => .Less, - .LtE => .LessEqual, - .Gt => .Greater, - .GtE => .GreaterEqual, - }; - - const inst = CompareOp.newCompareOp(op); - try compiler.code_object.emit(inst); - }, - - .True => { - try compiler.code_object.emit(Instruction.loadConst(.{ .Integer = 1 })); - }, - - .False => { - try compiler.code_object.emit(Instruction.loadConst(.{ .Integer = 0 })); - }, - - else => std.debug.panic("TODO: {s}", .{@tagName(expression)}), - } -} - -pub const CodeObject = struct { - instructions: std.ArrayList(Instruction), - - pub fn init(allocator: std.mem.Allocator) CodeObject { - return .{ - .instructions = std.ArrayList(Instruction).init(allocator), - }; - } - - pub fn deinit(object: *CodeObject) void { - object.instructions.deinit(); - } - - pub fn emit(object: *CodeObject, instruction: Instruction) !void { - try object.instructions.append(instruction); - } - - pub fn dump(object: *CodeObject) !void { - const log_dump = std.log.scoped(.dump); - - for (object.instructions.items) |inst| { - log_dump.debug("{}", .{inst}); - } - } -}; - -pub const Instruction = union(enum) { - LoadName: struct { name: []const u8 }, - StoreName: struct { name: []const u8 }, - LoadConst: struct { value: Constant }, - - Pop: void, - Pass: void, - Continue: void, - Break: void, - - Jump: struct { target: Label }, - JumpIf: struct { target: Label }, - CallFunction: struct { arg_count: usize }, - - BinaryOperation: struct { op: BinaryOp }, - UnaryOperation: struct { op: UnaryOp }, - CompareOperation: struct { op: CompareOp }, - - ReturnValue: void, - PushBlock: struct { start: Label, end: Label }, - - pub fn loadConst(value: Constant) Instruction { - return .{ - .LoadConst = .{ - .value = value, - }, - }; - } - - pub fn loadName(name: []const u8) Instruction { - return .{ - .LoadName = .{ - .name = name, - }, - }; - } - - pub fn storeName(name: []const u8) Instruction { - return .{ .StoreName = .{ - .name = name, - } }; - } - - pub fn callFunction(arg_count: usize) Instruction { - return .{ - .CallFunction = .{ - .arg_count = arg_count, - }, - }; - } - - pub fn jumpIf(target: Label) Instruction { - return .{ - .JumpIf = .{ - .target = target, - }, - }; - } - - pub fn format( - self: Instruction, - comptime fmt: []const u8, - _: std.fmt.FormatOptions, - writer: anytype, - ) !void { - std.debug.assert(fmt.len == 0); - - try writer.print("{s}", .{@tagName(self)}); - } -}; - -pub const Constant = union(enum) { - String: []const u8, - Integer: i32, -}; - -pub const BinaryOp = enum { - Power, - Multiply, - MatrixMultiply, - Divide, - FloorDivide, - Modulo, - Add, - Subtract, - Lshift, - Rshift, - And, - Xor, - Or, - - pub fn newBinaryOp(op: BinaryOp) Instruction { - return .{ - .BinaryOperation = .{ - .op = op, - }, - }; - } -}; - -pub const UnaryOp = enum { - Not, - Minus, -}; - -pub const CompareOp = enum { - Equal, - NotEqual, - Less, - LessEqual, - Greater, - GreaterEqual, - - pub fn newCompareOp(op: CompareOp) Instruction { - return .{ - .CompareOperation = .{ - .op = op, - }, - }; - } -}; diff --git a/src/frontend/Parser.zig b/src/frontend/Parser.zig deleted file mode 100644 index 3b14e89..0000000 --- a/src/frontend/Parser.zig +++ /dev/null @@ -1,101 +0,0 @@ -//! Inputs a list of tokens, and outputs an Ast. - -const std = @import("std"); -const std_extras = @import("std-extras"); - -const assert = std.debug.assert; - -const Allocator = std.mem.Allocator; - -const Tokenizer = @import("tokenizer/Tokenizer.zig"); -const Ast = @import("Ast.zig"); - -const Expression = Ast.Expression; - -const Token = Tokenizer.Token; - -const log = std.log.scoped(.parser); - -const Parser = @This(); - -const ParserError = error{ OutOfMemory, InvalidCharacter, Overflow }; - -index: u32 = 0, -allocator: Allocator, -tokens: []Token, - -pub fn init(allocator: Allocator) !Parser { - return .{ - .allocator = allocator, - .tokens = undefined, - }; -} - -pub fn deinit(_: *Parser) void {} - -pub fn parseFile(parser: *Parser, source: [:0]const u8) !Ast.Root { - var tokenizer = try Tokenizer.init(parser.allocator, source); - defer tokenizer.deinit(); - - // Tokenize the file. - parser.tokens = try tokenizer.parse(); - - // File: [statements] ENDMARKER - assert(parser.tokens[parser.tokens.len - 1].kind == .endmarker); - - var statements = std.ArrayList(Ast.Statement).init(parser.allocator); - - parser.index = 0; - while (parser.index < parser.tokens.len) { - const token = parser.currentToken(); - if (token.kind == .endmarker) break; - - // const statement = parser.parseSimpleStmts(token); - // _ = statement; // autofix - - // try statements.append(try parser.statement(token)); - } - - return .{ - .Module = .{ - .body = try statements.toOwnedSlice(), - }, - }; -} - -// General Statements - - - -/// Verifies the current token is kind, and moves forwards one. -/// -/// example: -///``` -/// currentToken().kind == .number; -/// nextToken().kind == .eof; -/// eat(.number); -/// currentToken().kind == .eof; -/// ``` -fn eat(parser: *Parser, kind: Tokenizer.Kind) void { - if (parser.currentToken().kind != kind) { - std.debug.panic("invalid token eaten, found: {}", .{parser.nextToken().kind}); - } - parser.index += 1; - if (parser.index >= parser.tokens.len) { - std.debug.panic("skip caused unexpected eof", .{}); - } -} - -/// Does not advanced, merely peaks -fn currentToken(parser: *Parser) Token { - return parser.tokens[parser.index]; -} - -/// Does not advanced, merely peaks -fn nextToken(parser: *Parser) Token { - return parser.tokens[parser.index + 1]; -} - -fn printCurrent(parser: *Parser) void { - log.debug("Current: {}", .{parser.nextToken().kind}); -} diff --git a/src/frontend/Python.zig b/src/frontend/Python.zig new file mode 100644 index 0000000..5094e90 --- /dev/null +++ b/src/frontend/Python.zig @@ -0,0 +1,55 @@ +//! Inputs python source and outputs Bytecode + +pub fn parse(source: [:0]const u8, allocator: std.mem.Allocator) ![]const u8 { + // TODO: this just causes errors for now + // const program = cpython.DecodeLocale(std.mem.span(std.os.argv[0])); + // cpython.SetProgramName(program); + + cpython.Initialize(); + + const compiled = cpython.CompileString(source, ""); + if (null == compiled) { + return error.FailedToCompileString; + } + + const bytecode = cpython.Marshal_WriteObjectToString(compiled); + if (null == bytecode) { + return error.FailedToWriteObjectToString; + } + + const size = cpython.Bytes_Size(bytecode); + const ptr = cpython.Bytes_AsString(bytecode); + if (null == ptr) { + return error.FailedToAsStringCode; + } + + cpython.DecRef(bytecode); + cpython.Finalize(); + + // construct the final pyc bytes + + const pyc_bytes = ptr.?[0..size]; + + const bytes = try allocator.alloc(u8, size + 16); + var fbs = std.io.fixedBufferStream(bytes); + const writer = fbs.writer(); + + try writer.writeInt(u32, MAGIC_NUMBER, .little); + try writer.writeByteNTimes(0, 4); + + const timestamp: u32 = @intCast(std.time.timestamp()); + try writer.writeInt(u32, timestamp, .little); + try writer.writeInt(u32, @intCast(source.len), .little); + try writer.writeAll(pyc_bytes); + + return bytes; +} + +const MAGIC_NUMBER: u32 = 0xa0d0d6f; + +const Python = @This(); +const std = @import("std"); + +const log = std.log.scoped(.python); + +const cpython = @import("cpython.zig"); diff --git a/src/frontend/cpython.zig b/src/frontend/cpython.zig new file mode 100644 index 0000000..c5dc952 --- /dev/null +++ b/src/frontend/cpython.zig @@ -0,0 +1,59 @@ +//! CPython bindings for compiling source code into bytecode. + +const std = @import("std"); + +extern fn Py_Initialize() void; +extern fn Py_Finalize() void; + +extern fn Py_DecRef(?*anyopaque) void; + +extern fn Py_DecodeLocale([*:0]const u8, *usize) ?[*:0]u8; +extern fn Py_SetProgramName([*:0]const u8) void; + +extern fn Py_CompileString([*:0]const u8, [*:0]const u8, c_int) ?*anyopaque; +extern fn PyMarshal_WriteObjectToString(?*anyopaque, c_int) ?*anyopaque; +extern fn PyBytes_Size(?*anyopaque) usize; +extern fn PyBytes_AsString(?*anyopaque) ?[*:0]u8; + +const Py_file_input: c_int = 257; +const Py_MARSHAL_VERSION: c_int = 4; + +pub fn Initialize() void { + Py_Initialize(); +} + +pub fn Finalize() void { + Py_Finalize(); +} + +pub fn DecRef(code: ?*anyopaque) void { + Py_DecRef(code); +} + +pub fn DecodeLocale(argv: [:0]const u8) [:0]const u8 { + var len: u64 = undefined; + if (Py_DecodeLocale(argv.ptr, &len)) |program| { + return program[0 .. len + 1 :0]; + } + std.debug.panic("Fatal error: cannot decode {s}", .{argv}); +} + +pub fn SetProgramName(name: [:0]const u8) void { + Py_SetProgramName(name.ptr); +} + +pub fn CompileString(source: [:0]const u8, filename: [:0]const u8) ?*anyopaque { + return Py_CompileString(source.ptr, filename.ptr, Py_file_input); +} + +pub fn Marshal_WriteObjectToString(code: ?*anyopaque) ?*anyopaque { + return PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION); +} + +pub fn Bytes_Size(code: ?*anyopaque) usize { + return PyBytes_Size(code); +} + +pub fn Bytes_AsString(code: ?*anyopaque) ?[*:0]u8 { + return PyBytes_AsString(code); +} diff --git a/src/frontend/new-compiler/PyObject.zig b/src/frontend/new-compiler/PyObject.zig deleted file mode 100644 index cd99b54..0000000 --- a/src/frontend/new-compiler/PyObject.zig +++ /dev/null @@ -1,2 +0,0 @@ -//! The PyObject definition in Zig - diff --git a/src/frontend/new-compiler/SymTable.zig b/src/frontend/new-compiler/SymTable.zig deleted file mode 100644 index 0abc8d5..0000000 --- a/src/frontend/new-compiler/SymTable.zig +++ /dev/null @@ -1,45 +0,0 @@ -//! The Symbol Table - - -const std = @import("std"); -const PyObject = @import("PyObject.zig"); - -/// The file currently being compiled. -filename: PyObject, -blocks: []PyObject, - -/// Current symbol table entry -current: SymTableEntry, - -/// Symbol table entry for module -top: SymTableEntry, - -/// The number of blocks used. -num_blocks: u32, - -/// The name of the current class or NULL -private: ?PyObject, - -/// Current recursion depth -recursion_depth: u32, -/// Recursion limit -recursion_limit: u32, - - -pub const SymTableEntry = struct { - /// Name of the current block (string) - name: PyObject, - - /// Child Blocks - children: []PyObject, - - /// Location of global and nonlocal statements - directives: []PyObject, - - /// Is the block nested? - nested: bool, - /// Are there free variables? - free: bool = true, - /// Do any child blocks have free variables? - child_free: bool = true, -}; \ No newline at end of file diff --git a/src/frontend/tokenizer/Tokenizer.zig b/src/frontend/tokenizer/Tokenizer.zig deleted file mode 100644 index ce43fd8..0000000 --- a/src/frontend/tokenizer/Tokenizer.zig +++ /dev/null @@ -1,704 +0,0 @@ -//! -//! Tools for parsing Python 3 source code. -//! - -const std = @import("std"); -const testing = std.testing; - -const Tokenizer = @This(); - -const log = std.log.scoped(.tokenizer); - -const TokenizerError = error{ OutOfMemory, UnexpectedEOF, UnexpectedToken }; - -allocator: std.mem.Allocator, -tokens: Tokens, -source: [:0]const u8, -offset: usize = 0, -line: usize = 0, -column: usize = 0, - -/// The kind of token. -pub const Kind = enum { - // General - number, - identifier, - - // Whitespace - tab, - newline, - - // Keywords - keyword_if, - keyword_else, - keyword_elif, - keyword_while, - keyword_for, - keyword_in, - keyword_return, - keyword_break, - keyword_continue, - keyword_pass, - keyword_def, - keyword_class, - keyword_as, - keyword_with, - keyword_assert, - keyword_del, - keyword_except, - keyword_finally, - keyword_from, - keyword_global, - keyword_import, - keyword_lambda, - keyword_nonlocal, - keyword_raise, - keyword_try, - keyword_yield, - keyword_and, - keyword_or, - keyword_not, - keyword_is, - - // Operators - op_plus, - op_minus, - op_multiply, - op_divide, - - op_increment, - op_plus_equal, - op_equal, - op_assign, - - // Symbols - lparen, - rparen, - lbracket, - rbracket, - colon, - comma, - dot, - semicolon, - at, - - // Extra - endmarker, -}; - -/// A token aka slice of data inside the source. -pub const Data = []const u8; - -/// The token kind and data that will be used inside the MultiArrayList. -pub const Token = struct { - kind: Kind, - data: Data, - - pub fn eql(lhs: Token, rhs: Token) bool { - return lhs.kind == rhs.kind; - } -}; - -/// The list of tokens. -pub const Tokens = std.MultiArrayList(Token); - -/// The index of a token inside the MultiArrayList. -pub const TokenIndex = usize; - -/// Each keyword and its corresponding token kind. -pub const KeywordMap = std.ComptimeStringMap(Kind, .{ - .{ "if", .keyword_if }, - .{ "else", .keyword_else }, - .{ "elif", .keyword_elif }, - .{ "while", .keyword_while }, - .{ "for", .keyword_for }, - .{ "in", .keyword_in }, - .{ "return", .keyword_return }, - .{ "break", .keyword_break }, - .{ "continue", .keyword_continue }, - .{ "pass", .keyword_pass }, - .{ "def", .keyword_def }, - .{ "class", .keyword_class }, - .{ "as", .keyword_as }, - .{ "with", .keyword_with }, - .{ "assert", .keyword_assert }, - .{ "del", .keyword_del }, - .{ "except", .keyword_except }, - .{ "finally", .keyword_finally }, - .{ "from", .keyword_from }, - .{ "global", .keyword_global }, - .{ "import", .keyword_import }, - .{ "lambda", .keyword_lambda }, - .{ "nonlocal", .keyword_nonlocal }, - .{ "raise", .keyword_raise }, - .{ "try", .keyword_try }, - .{ "yield", .keyword_yield }, - .{ "and", .keyword_and }, - .{ "or", .keyword_or }, - .{ "not", .keyword_not }, - .{ "is", .keyword_is }, -}); - -/// Each operators starting symbol. -pub const OperatorStartMap = std.ComptimeStringMap(void, .{ - .{ "+", void }, - .{ "-", void }, - .{ "*", void }, - .{ "/", void }, - .{ "%", void }, - .{ "&", void }, - .{ "|", void }, - .{ "^", void }, - .{ "~", void }, - .{ "<", void }, - .{ ">", void }, - .{ "=", void }, - .{ "!", void }, -}); - -/// Each symbol and its corresponding token kind. -pub const SymbolMap = std.ComptimeStringMap(Kind, .{ - .{ "(", .lparen }, - .{ ")", .rparen }, - .{ "[", .lbracket }, - .{ "]", .rbracket }, - .{ ":", .colon }, - .{ ",", .comma }, - .{ ".", .dot }, - .{ ";", .semicolon }, - .{ "@", .at }, -}); - -// ================================================================= -// Public functions -// ================================================================= - -/// Creates a new tokenizer that will tokenize the given source. -pub fn init(allocator: std.mem.Allocator, source: [:0]const u8) !Tokenizer { - return .{ - .allocator = allocator, - .source = source, - .tokens = Tokens{}, - }; -} - -/// Deinitializes the tokenizer. -pub fn deinit(tokenizer: *Tokenizer) void { - tokenizer.tokens.deinit(tokenizer.allocator); -} - -/// Parses the input, and returns a list of tokens -pub fn parse(tokenizer: *Tokenizer) ![]Token { - var tokens = std.ArrayList(Token).init(tokenizer.allocator); - - while (true) { - const token = tokenizer.nextToken() catch |e| { - if (e == error.UnexpectedEOF) break; - return e; - }; - try tokens.append(token); - } - - try tokens.append(.{ .data = undefined, .kind = .endmarker }); - - for (tokens.items) |token| { - log.debug("Token: {}", .{token.kind}); - } - - return try tokens.toOwnedSlice(); -} - -pub fn nextToken(self: *Tokenizer) TokenizerError!Token { - const token_id = try self.nextTokenIndex(); - return self.tokens.get(token_id); -} - -/// Parses the next token index in the source. -pub fn nextTokenIndex(self: *Tokenizer) TokenizerError!TokenIndex { - // TODO(SeedyROM): This is bad, I should feel bad. - if (self.checkEOF()) { - return error.UnexpectedEOF; - } - - // Ignore spaces for now. - if (self.source[self.offset] == ' ') { - self.offset += 1; - return self.nextTokenIndex(); - } - - // If we're at a whitespace, we're parsing a whitespace - if (std.ascii.isWhitespace(self.source[self.offset])) { - return self.whitespace(); - } - - // If we're at a digit, we're parsing a number - if (std.ascii.isDigit(self.source[self.offset]) or self.source[self.offset] == '.') { - return self.number(); - } - - // If we're at a letter, we're parsing an identifier or keyword - // TODO(SeedyROM): This isAlphabetic needs to include _ - if (std.ascii.isAlphabetic(self.source[self.offset])) { - const ident_id = try self.identifier(); - const token_data = self.tokens.items(.data); - - // If the identifier is a keyword... - if (KeywordMap.get(token_data[ident_id])) |keyword| { - var token = self.tokens.get(ident_id); - token.kind = keyword; - self.tokens.set(ident_id, token); - } - - return ident_id; - } - - // Parse symbols - if (SymbolMap.has(&.{self.source[self.offset]}) == true) { - return self.symbol(); - } - - // Parse operators - if (OperatorStartMap.has(&.{self.source[self.offset]}) == true) { - return self.operator(); - } - - std.log.err("Unexpected token '{c}' at ({d}:{d})\n", .{ self.source[self.offset], self.line, self.column }); - return error.UnexpectedToken; -} - -fn lastToken(self: *Tokenizer) TokenIndex { - return self.tokens.len - 1; -} - -pub fn checkEOF(self: *Tokenizer) bool { - return self.offset >= self.source.len; -} - -fn advance(self: *Tokenizer) void { - if (self.source[self.offset] == '\n') { - self.line += 1; - self.column = 0; - } else { - self.column += 1; - } - self.offset += 1; -} - -// ================================================================= -// Parsing functions -// ================================================================= - -/// Parses a whitespace token. -fn whitespace(self: *Tokenizer) !TokenIndex { - // Parse the whitespace - const value = self.source[self.offset]; - const kind = switch (value) { - '\t' => Kind.tab, - '\n' => Kind.newline, - else => return error.UnexpectedToken, - }; - - try self.tokens.append(self.allocator, Token{ .kind = kind, .data = self.source[self.offset .. self.offset + 1] }); - self.advance(); - return self.lastToken(); -} - -/// Parses a number. -fn number(self: *Tokenizer) !TokenIndex { - // Parse the number - const start = self.offset; - - // If the number starts with a dot we're implying a 0 - if (self.source[self.offset] == '.') { - self.advance(); - } - - outer: while (std.ascii.isDigit(self.source[self.offset])) { - self.advance(); - - // If we're starting with 0, we might be parsing a binary, octal, or hex number - if (self.source[start] == '0') { - // If we're parsing hex. - if (self.source[self.offset] == 'x' or self.source[self.offset] == 'X') { - self.advance(); - - while (std.ascii.isHex(self.source[self.offset])) { - self.advance(); - - // If we're at the end of the source, break - if (self.checkEOF()) break :outer; - } - } - - // If we're parsing binary. - if (self.source[self.offset] == 'b' or self.source[self.offset] == 'B') { - self.advance(); - - while (self.source[self.offset] == '0' or self.source[self.offset] == '1') { - self.advance(); - - // If we're at the end of the source, break - if (self.checkEOF()) break :outer; - } - } - - // If we're parsing octal. - if (self.source[self.offset] == 'o' or self.source[self.offset] == 'O') { - self.advance(); - - while (self.source[self.offset] >= '0' and self.source[self.offset] <= '7') { - self.advance(); - - // If we're at the end of the source, break - if (self.checkEOF()) break :outer; - } - } - } - - // If we're at the end of the source, break - if (self.checkEOF()) break; - - // If we get a dot, we're parsing a fractional number, just keep going - if (self.source[self.offset] == '.') { - self.advance(); - if (self.checkEOF()) break; - } - } - - // Create the token - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.number, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); -} - -/// Parses an identifier. -fn identifier(self: *Tokenizer) !TokenIndex { - // Parse the identifier - const start = self.offset; - while (std.ascii.isAlphabetic(self.source[self.offset])) { - self.advance(); - - // If we're at the end of the source, break - if (self.checkEOF()) break; - } - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.identifier, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); -} - -/// Parses an operator. -// TODO(SeedyROM): Clean this up. -fn operator(self: *Tokenizer) !TokenIndex { - const start = self.offset; - - if (self.source[start] == '=') { - self.advance(); - if (self.checkEOF()) { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_assign, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - if (self.source[self.offset] == '=') { - self.advance(); - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_equal, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } else { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_assign, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - } - - if (self.source[start] == '+') { - self.advance(); - if (self.checkEOF()) { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_plus, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - if (self.source[self.offset] == '=') { - self.advance(); - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_plus_equal, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } else if (self.source[self.offset] == '+') { - self.advance(); - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_increment, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_plus, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - if (self.source[start] == '-') { - self.advance(); - if (self.checkEOF()) { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_minus, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_minus, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - if (self.source[start] == '*') { - self.advance(); - if (self.checkEOF()) { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_multiply, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_multiply, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - if (self.source[start] == '/') { - self.advance(); - if (self.checkEOF()) { - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_divide, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - const data = self.source[start..self.offset]; - const token = Token{ .kind = Kind.op_divide, .data = data }; - try self.tokens.append(self.allocator, token); - return self.lastToken(); - } - - return error.UnexpectedToken; -} - -fn symbol(self: *Tokenizer) !TokenIndex { - if (SymbolMap.get(&.{self.source[self.offset]})) |kind| { - const data = self.source[self.offset .. self.offset + 1]; - const token = Token{ .kind = kind, .data = data }; - try self.tokens.append(self.allocator, token); - self.advance(); - return self.lastToken(); - } - - return error.UnexpectedToken; -} - -// ================================================================= - -fn testTokenizer(allocator: std.mem.Allocator, source: []const u8, expected: []const Token) !void { - var tokenizer = try init(allocator, source); - defer tokenizer.deinit(); - - const tokens = try tokenizer.parse(); - defer allocator.free(tokens); - - try testing.expectEqual(expected.len, tokens.len); - for (0..tokens.len) |i| { - const token = tokens[i]; - const expected_token = expected[i]; - - try testing.expectEqual(expected_token.kind, token.kind); - try testing.expectEqualStrings(expected_token.data, token.data); - } -} - -test "whole number" { - const source = "123"; - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "fractional number" { - const source = "112355.123"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "fractional number without whole part" { - const source = ".123"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "fractional number without fractional part" { - const source = "123."; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "hex number" { - const source = "0x123abc"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "binary number" { - const source = "0b1010101"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "octal number" { - const source = "0o1234567"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.number, .data = source }, - }, - ); -} - -test "identifier" { - const source = "hello"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.identifier, .data = source }, - }, - ); -} - -test "tab whitespace" { - const source = "\t"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.tab, .data = source }, - }, - ); -} - -test "newline whitespace" { - const source = "\n"; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = Kind.newline, .data = source }, - }, - ); -} - -test "keywords" { - for (KeywordMap.kvs) |kv| { - const source = kv.key; - const kind = kv.value; - - try testTokenizer( - testing.allocator, - source, - &.{ - .{ .kind = kind, .data = source }, - }, - ); - } -} - -test "operators" { - const operators: []const Token = &.{ - .{ .kind = Kind.op_plus, .data = "+" }, - .{ .kind = Kind.op_plus_equal, .data = "+=" }, - .{ .kind = Kind.op_increment, .data = "++" }, - }; - - for (operators) |op| { - const source = op.data; - - try testTokenizer( - testing.allocator, - source, - &.{ - op, - }, - ); - } -} - -// TODO(Sinon): Finish this test case -test "simple expression" { - const source = - \\if x == 5: - \\ x+=15 - \\ print(x) - ; - - var tokenizer = try init(testing.allocator, source); - defer tokenizer.deinit(); - - while (!tokenizer.checkEOF()) { - const token_id = tokenizer.nextToken() catch |err| { - switch (err) { - error.UnexpectedEOF => break, - else => { - std.log.err("Error: {any}", .{err}); - return err; - }, - } - }; - _ = token_id; - } -} diff --git a/src/std-extra/mem.zig b/src/std-extra/mem.zig deleted file mode 100644 index c03995e..0000000 --- a/src/std-extra/mem.zig +++ /dev/null @@ -1,71 +0,0 @@ -const std = @import("std"); - -const DelimiterType = enum { sequence, any, scalar, context }; - -pub fn TokenIteratorContext( - comptime T: type, - comptime equalFn: fn (lhs: T, rhs: T) bool, -) type { - return struct { - buffer: []const T, - delimiter: T, - index: usize, - - const Self = @This(); - - /// Returns a slice of the current token, or null if tokenization is - /// complete, and advances to the next token. - pub fn next(self: *Self) ?[]const T { - const result = self.peek() orelse return null; - self.index += result.len; - return result; - } - - /// Returns a slice of the current token, or null if tokenization is - /// complete. Does not advance to the next token. - pub fn peek(self: *Self) ?[]const T { - // move to beginning of token - while (self.index < self.buffer.len and self.isDelimiter(self.index)) : (self.index += 1) {} - const start = self.index; - if (start == self.buffer.len) { - return null; - } - - // move to end of token - var end = start; - while (end < self.buffer.len and !self.isDelimiter(end)) : (end += 1) {} - - return self.buffer[start..end]; - } - - /// Returns a slice of the remaining bytes. Does not affect iterator state. - pub fn rest(self: Self) []const T { - // move to beginning of token - var index: usize = self.index; - while (index < self.buffer.len and self.isDelimiter(index)) : (index += 1) {} - return self.buffer[index..]; - } - - /// Resets the iterator to the initial token. - pub fn reset(self: *Self) void { - self.index = 0; - } - - fn isDelimiter(self: Self, index: usize) bool { - return equalFn(self.buffer[index], self.delimiter); - } - }; -} - -pub fn tokenizeScalar( - comptime T: type, - buffer: []const T, - delimiters: T, - comptime equalFn: fn (lhs: T, rhs: T) bool, -) TokenIteratorContext(T, equalFn) { - return .{ - .index = 0, - .buffer = buffer, - .delimiter = delimiters, - }; -} diff --git a/src/std-extra/std.zig b/src/std-extra/std.zig deleted file mode 100644 index e11b1ec..0000000 --- a/src/std-extra/std.zig +++ /dev/null @@ -1,3 +0,0 @@ -//! An standard library of extra functions I need - -pub const mem = @import("mem.zig"); diff --git a/src/vm/Object.zig b/src/vm/Object.zig index e1ea4d7..a43b4e8 100644 --- a/src/vm/Object.zig +++ b/src/vm/Object.zig @@ -1,6 +1,6 @@ const std = @import("std"); const Vm = @import("Vm.zig"); -const builtins = @import("../builtins.zig"); +const builtins = @import("builtins.zig"); const Co = @import("../compiler/CodeObject.zig"); const BigIntConst = std.math.big.int.Const; @@ -28,7 +28,7 @@ payload: ?(*align(blk: { }) anyopaque), pub const Tag = enum(usize) { - const first_payload = @intFromEnum(Tag.none) + 1; + const first_payload = @intFromEnum(Tag.int); // Note: this is the literal None type. none, @@ -40,6 +40,7 @@ pub const Tag = enum(usize) { boolean, tuple, list, + set, /// A builtin Zig defined function. zig_function, @@ -60,6 +61,8 @@ pub const Tag = enum(usize) { .tuple => Payload.Tuple, .list => Payload.List, + .set => Payload.Set, + .zig_function => Payload.ZigFunc, .codeobject => Payload.CodeObject, .function => Payload.PythonFunction, @@ -95,31 +98,47 @@ pub fn get(object: *const Object, comptime t: Tag) *Data(t) { } pub fn getMemberFunction(object: *const Object, name: []const u8, vm: *Vm) error{OutOfMemory}!?Object { - const member_list = - switch (object.tag) { + const member_list: Payload.MemberFuncTy = switch (object.tag) { .list => Payload.List.MemberFns, + .set => Payload.Set.MemberFns, else => std.debug.panic("{s} has no member functions", .{@tagName(object.tag)}), }; - - inline for (member_list) |func| { - if (std.mem.eql(u8, func[0], name)) { - const func_ptr = func[1]; - + for (member_list) |func| { + if (std.mem.eql(u8, func.name, name)) { + const func_ptr = func.func; return try Object.create(.zig_function, vm.allocator, func_ptr); } } - return null; } +pub fn callMemberFunction( + object: *const Object, + vm: *Vm, + name: []const u8, + args: []Object, + kw: ?builtins.KW_Type, +) !void { + const func = try object.getMemberFunction(name, vm) orelse return error.NotAMemberFunction; + const func_ptr = func.get(.zig_function); + const self_args = try std.mem.concat(vm.allocator, Object, &.{ &.{object.*}, args }); + try @call(.auto, func_ptr.*, .{ vm, self_args, kw }); +} + pub const Payload = union(enum) { value: Value, zig_func: ZigFunc, tuple: Tuple, + set: Set, list: List, codeobject: CodeObject, function: PythonFunction, + pub const MemberFuncTy = []const struct { + name: []const u8, + func: *const builtins.func_proto, + }; + pub const Value = union(enum) { int: BigIntManaged, string: []const u8, @@ -133,9 +152,8 @@ pub const Payload = union(enum) { pub const List = struct { list: std.ArrayListUnmanaged(Object), - /// First arg is the List itself. - pub const MemberFns = &.{ - .{ "append", append }, + pub const MemberFns: MemberFuncTy = &.{ + .{ .name = "append", .func = append }, }; fn append(vm: *Vm, args: []Object, kw: ?builtins.KW_Type) !void { @@ -159,6 +177,47 @@ pub const Payload = union(enum) { name: []const u8, co: *Co, }; + + pub const Set = struct { + set: std.AutoHashMapUnmanaged(Object, void), + frozen: bool, + + pub const MemberFns: MemberFuncTy = &.{ + // zig fmt: off + .{ .name = "update", .func = update }, + .{ .name = "add" , .func = add }, + // zig fmt: on + }; + + /// Appends a set or iterable object. + fn update(vm: *Vm, args: []Object, kw: ?builtins.KW_Type) !void { + if (null != kw) @panic("set.update() has no kw args"); + + if (args.len != 2) std.debug.panic("set.update() takes exactly 1 argument ({d} given)", .{args.len - 1}); + + const self = args[0].get(.set); + const arg = args[0]; + + switch (arg.tag) { + .set => { + const arg_set = args[1].get(.set).set; + var obj_iter = arg_set.keyIterator(); + while (obj_iter.next()) |obj| { + try self.set.put(vm.allocator, obj.*, {}); + } + }, + else => std.debug.panic("can't append {s} to set", .{@tagName(arg.tag)}), + } + } + + /// Appends an item. + fn add(vm: *Vm, args: []Object, kw: ?builtins.KW_Type) !void { + if (null != kw) @panic("set.add() has no kw args"); + + if (args.len != 2) std.debug.panic("set.add() takes exactly 1 argument ({d} given)", .{args.len - 1}); + _ = vm; + } + }; }; pub fn format( @@ -210,6 +269,21 @@ pub fn format( try writer.writeAll(")"); }, + .set => { + const set = object.get(.set).set; + var iter = set.keyIterator(); + const set_len = set.count(); + + try writer.writeAll("{"); + + var i: u32 = 0; + while (iter.next()) |obj| : (i += 1){ + try writer.print("{}", .{obj}); + if (i < set_len - 1) try writer.writeAll(", "); + } + + try writer.writeAll("}"); + }, else => try writer.print("TODO: Object.format '{s}'", .{@tagName(object.tag)}), } diff --git a/src/vm/Vm.zig b/src/vm/Vm.zig index 57197af..50e9fd8 100644 --- a/src/vm/Vm.zig +++ b/src/vm/Vm.zig @@ -16,7 +16,7 @@ const Marshal = @import("../compiler/Marshal.zig"); const Object = @import("Object.zig"); const Vm = @This(); -const builtins = @import("../builtins.zig"); +const builtins = @import("builtins.zig"); const log = std.log.scoped(.vm); @@ -124,11 +124,14 @@ fn exec(vm: *Vm, inst: Instruction) !void { .LOAD_FAST => try vm.execLoadFast(inst), .BUILD_LIST => try vm.execBuildList(inst), + .BUILD_SET => try vm.execBuildSet(inst), .STORE_NAME => try vm.execStoreName(inst), .STORE_SUBSCR => try vm.execStoreSubScr(), .STORE_FAST => try vm.execStoreFast(inst), + .SET_UPDATE => try vm.execSetUpdate(inst), + .RETURN_VALUE => try vm.execReturnValue(), .POP_TOP => try vm.execPopTop(), @@ -213,10 +216,23 @@ fn execReturnValue(vm: *Vm) !void { } fn execBuildList(vm: *Vm, inst: Instruction) !void { + const count = inst.extra; + + if (count == 0) return; + _ = vm; + + @panic("TODO: execBuildList count != 0"); +} + +fn execBuildSet(vm: *Vm, inst: Instruction) !void { const objects = try vm.popNObjects(inst.extra); - const list = std.ArrayListUnmanaged(Object).fromOwnedSlice(objects); + var list = std.AutoHashMapUnmanaged(Object, void){}; + + for (objects) |object| { + try list.put(vm.allocator, object, {}); + } - const val = try Object.create(.list, vm.allocator, .{ .list = list }); + const val = try Object.create(.set, vm.allocator, .{ .set = list, .frozen = false }); try vm.stack.append(vm.allocator, val); } @@ -386,6 +402,17 @@ fn execStoreFast(vm: *Vm, inst: Instruction) !void { vm.current_co.varnames[var_num] = tos; } +fn execSetUpdate(vm: *Vm, inst: Instruction) !void { + const seq = vm.stack.pop(); + const target = vm.stack.items[vm.stack.items.len - inst.extra]; + try target.callMemberFunction( + vm, + "update", + try vm.allocator.dupe(Object, &.{seq}), + null, + ); +} + fn execPopJump(vm: *Vm, inst: Instruction, case: bool) !void { const tos = vm.stack.pop(); @@ -457,6 +484,18 @@ pub fn loadConst(allocator: Allocator, inst: Marshal.Result) !Object { .CodeObject => |co| { return Object.create(.codeobject, allocator, .{ .co = co }); }, + .Set => |set_struct| { + const set = set_struct.set; + + var items = std.AutoHashMapUnmanaged(Object, void){}; + for (set) |elem| { + try items.put(allocator, try loadConst(allocator, elem), {}); + } + return Object.create(.set, allocator, .{ + .set = items, + .frozen = set_struct.frozen, + }); + }, else => std.debug.panic("TODO: loadConst {s}", .{@tagName(inst)}), } } diff --git a/src/builtins.zig b/src/vm/builtins.zig similarity index 98% rename from src/builtins.zig rename to src/vm/builtins.zig index 8eba331..d38d8c5 100644 --- a/src/builtins.zig +++ b/src/vm/builtins.zig @@ -4,9 +4,9 @@ const std = @import("std"); const tracer = @import("tracer"); -const Object = @import("vm/Object.zig"); +const Object = @import("Object.zig"); -const Vm = @import("vm/Vm.zig"); +const Vm = @import("Vm.zig"); const fatal = @import("panic.zig").fatal; pub const KW_Type = std.StringHashMap(Object); diff --git a/src/panic.zig b/src/vm/panic.zig similarity index 96% rename from src/panic.zig rename to src/vm/panic.zig index f879b3a..a8437b6 100644 --- a/src/panic.zig +++ b/src/vm/panic.zig @@ -15,5 +15,5 @@ pub fn fatal(comptime format: []const u8, args: anytype) noreturn { stderr.writeAll(msg) catch |err| @panic(@errorName(err)); - std.os.exit(1); + std.posix.exit(1); } diff --git a/tests/behaviour/add.py b/tests/behaviour/add.py index 4754eba..46bfafa 100644 --- a/tests/behaviour/add.py +++ b/tests/behaviour/add.py @@ -1,5 +1,9 @@ a = 1 b = 2 c = a + b - print(c) + +a = 1 +b = a + 2 +a += 1 +print(a, b) \ No newline at end of file diff --git a/tests/behaviour/methods.py b/tests/behaviour/methods.py index b39061e..e69de29 100644 --- a/tests/behaviour/methods.py +++ b/tests/behaviour/methods.py @@ -1,3 +0,0 @@ -a = [1, 2] -a.append(3) -print(a) \ No newline at end of file diff --git a/tests/cases.zig b/tests/cases.zig index 188b344..414d044 100644 --- a/tests/cases.zig +++ b/tests/cases.zig @@ -1,14 +1,17 @@ const std = @import("std"); +const matrix = @import("matrix.zig"); const Build = std.Build; const Step = Build.Step; -const builtins = @import("builtins/builtins.zig"); -const real_cases = @import("real_cases/real_cases.zig"); -const behaviour = @import("behaviour/behaviour.zig"); +const test_dirs: []const []const u8 = &.{ + "builtins", + "real_cases", + "behaviour", +}; pub fn addCases(b: *Build, exe: *Step.Compile, parent_step: *Step) !void { - parent_step.dependOn(try builtins.addCases(b, exe)); - parent_step.dependOn(try real_cases.addCases(b, exe)); - parent_step.dependOn(try behaviour.addCases(b, exe)); + for (test_dirs) |dir| { + parent_step.dependOn(try matrix.addCases(b, dir, exe)); + } } diff --git a/tests/matrix.zig b/tests/matrix.zig index a15a506..ecf3ca2 100644 --- a/tests/matrix.zig +++ b/tests/matrix.zig @@ -3,38 +3,45 @@ const File = std.fs.File; const Allocator = std.mem.Allocator; const Step = std.Build.Step; +const Run = std.Build.Step.Run; const MatrixError = error{}; -const path_offset: []const u8 = "tests/"; +pub fn addCases(b: *std.Build, test_dir: []const u8, exe: *Step.Compile) !*Step { + const root_path = try b.build_root.join(b.allocator, &.{ "tests", test_dir }); -pub fn addCase(b: *std.Build, name: []const u8, exe: *Step.Compile) !*Step { - const test_path = b.fmt("{s}{s}", .{ path_offset, name }); - const test_step = b.step(name, ""); + const dir_step = b.step(std.fs.path.basename(test_dir), b.fmt("Tests the files in {s}", .{test_dir})); + const files = try getPyFilesInDir(root_path, b.allocator); + for (files) |test_file| { + const test_run = try addCase(b, test_file, exe); + dir_step.dependOn(&test_run.step); + } + + return dir_step; +} + +pub fn addCase(b: *std.Build, path: []const u8, exe: *Step.Compile) !*Run { // Compare against CPython output. const result = try std.process.Child.run(.{ .allocator = b.allocator, .argv = &.{ "python3.10", - test_path, + path, }, - .cwd = ".", + .cwd = std.fs.path.dirname(path).?, .expand_arg0 = .expand, }); const run_cmd = b.addRunArtifact(exe); - run_cmd.addArg(test_path); + run_cmd.addArg(path); run_cmd.expectStdOutEqual(result.stdout); - - test_step.dependOn(&run_cmd.step); - - return test_step; + return run_cmd; } -pub fn getPyFilesInDir(dir_path: []const u8, ally: Allocator) ![]const []const u8 { - var files = std.ArrayList([]const u8).init(ally); +pub fn getPyFilesInDir(dir_path: []const u8, allocator: Allocator) ![]const []const u8 { + var files = std.ArrayList([]const u8).init(allocator); defer files.deinit(); var dir = try std.fs.cwd().openDir(dir_path, .{ .iterate = true }); @@ -46,7 +53,11 @@ pub fn getPyFilesInDir(dir_path: []const u8, ally: Allocator) ![]const []const u if (!std.mem.endsWith(u8, file.name, ".py")) { continue; } - try files.append(try ally.dupe(u8, file.name)); + try files.append(try std.mem.concat(allocator, u8, &.{ + dir_path, + &.{std.fs.path.sep}, + file.name, + })); } return try files.toOwnedSlice(); diff --git a/tools/opcode2zig.zig b/tools/opcode2zig.zig index bfd6943..28a0f8d 100644 --- a/tools/opcode2zig.zig +++ b/tools/opcode2zig.zig @@ -2,8 +2,6 @@ const std = @import("std"); -const allocator = std.heap.page_allocator; - fn usage() void { const writer = std.io.getStdOut().writer(); @@ -15,12 +13,21 @@ fn usage() void { writer.writeAll(usage_string) catch @panic("failed to print usage"); } +const skip_names = std.StaticStringMap(void).initComptime(.{ + .{ "HAVE_ARGUMENT", {} }, +}); + pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer _ = gpa.deinit(); + + const allocator = gpa.allocator(); + const args = try std.process.argsAlloc(allocator); if (args.len < 3) { usage(); - std.os.exit(0); + std.posix.exit(0); } const file_name = args[1]; @@ -49,7 +56,7 @@ pub fn main() !void { var out_buf = std.ArrayList(u8).init(allocator); const writer = out_buf.writer(); defer { - std.fs.cwd().writeFile(output_name, out_buf.items) catch @panic("fail to write out_buf"); + std.fs.cwd().writeFile(.{ .sub_path = output_name, .data = out_buf.items }) catch @panic("fail to write out_buf"); } try writer.print("// This file was autogenerated by tools/opcode2zig.zig\n", .{}); try writer.print("// DO NOT EDIT\n\n", .{}); @@ -84,9 +91,7 @@ pub fn main() !void { } if ((name == null) or (value == null)) continue; - - if (std.mem.eql(u8, name.?, "HAVE_ARGUMENT")) continue; - + if (skip_names.get(name.?)) |_| continue; _ = std.fmt.parseInt(u32, value.?, 10) catch continue; try writer.print("\t{s} = {s},\n", .{ name.?, value.? }); diff --git a/vendor/README.md b/vendor/README.md new file mode 100644 index 0000000..f09998e --- /dev/null +++ b/vendor/README.md @@ -0,0 +1,3 @@ +# `vendor` + +These are all the different vendored files. \ No newline at end of file diff --git a/includes/opcode.h b/vendor/opcode.h similarity index 100% rename from includes/opcode.h rename to vendor/opcode.h